#include <util/generic/size_literals.h>

#include <library/cpp/yson/node/node_io.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>
#include <robot/jupiter/protos/acceptance.pb.h>

#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/regex.h>
#include <wmconsole/version3/wmcutil/thread.h>
#include <wmconsole/version3/wmcutil/url.h>

#include "config.h"
#include "process_turbo_logs.h"
#include "field_names.h"

using namespace NJupiter;

namespace NWebmaster {
namespace NTurbo {

const char* LAST_PROCESSED_SOURCE_ATTR = "lastProcessedSource";
const char* LAST_PROCESSED_FAST_SOURCE_ATTR = "lastProcessedFastSource";
const char* LAST_PROCESSED_YML_SOURCE_ATTR = "lastProcessedYmlSource";
const char* LAST_CLEANING_ATTR = "lastCleaning";
const char* LATEST_MERGED_TABLE_ATTR = "latest_merged_table";
const char* HISTORY_TABLE = "history";
const char* TURBO_TIMESTAMP_ATTR = "turbo_timestamp";

const char* ACTION_POSTPONE = "postpone";

const char* ERROR_UNKNOWN = "Unknown";

const size_t MAX_ITEMS = 10;
const size_t MAX_ERRORS = 10;
const size_t UNLIMITED_MAX = std::numeric_limits<size_t>::max();
const time_t MAX_FEED_HISTORY_AGE = 100 * 86400; // 100 days
const time_t FEED_HISTORY_CLEANING_PERIOD = 86400 * 5; // 5 days

struct TExtractTimestampMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(SourceFilter, SourceMap)

public:
    TExtractTimestampMapper() = default;
    TExtractTimestampMapper(const TString &sourceFilter, const TMap<size_t, TString> &sourceMap) :
        SourceFilter(sourceFilter), SourceMap(sourceMap) {

    }

    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            if (NYTUtils::FromNodeOrDefault<TString>(input->GetRow()[FIELD_SOURCE], "") != SourceFilter) {
                continue;
            }
            NYT::TNode row = input->GetRow();
            const TString &hash = row[FIELD_HASH].AsString();
            size_t del = hash.find('#');
            i64 timestamp = FromString<i64>(hash.substr(0, del));
            output->AddRow(row
                (FIELD_TIMESTAMP, timestamp)
                (FIELD_TYPE, SourceMap.at(input->GetTableIndex()))
            );
        }
    }

public:
    TString SourceFilter;
    TMap<size_t, TString> SourceMap;
};

REGISTER_MAPPER(TExtractTimestampMapper)

struct TTurboLogsReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(MaxItems, MaxErrors, SearchValidFeedStats)

public:
    TTurboLogsReducer() = default;
    TTurboLogsReducer(size_t maxItems, size_t maxErrors, bool searchValidFeedStats)
            : MaxItems(maxItems), MaxErrors(maxErrors), SearchValidFeedStats(searchValidFeedStats) {

    }

    void Do(TReader *input, TWriter *output) override {
        const TString feed = NYTUtils::FromNodeOrDefault<TString>(input->GetRow()[FIELD_FEED], "");
        if (feed.Empty()) {
            return;
        }
        const TString type = NYTUtils::FromNodeOrDefault<TString>(input->GetRow()[FIELD_TYPE], "");
        const TString host = NYTUtils::FromNodeOrDefault<TString>(input->GetRow()[FIELD_WMC_HOST], "");

        // collect information into one json
        NYT::TNode data = NYT::TNode::CreateMap();
        NYT::TNode items = NYT::TNode::CreateList();
        TVector<NYT::TNode> errors;
        // error type counter
        THashMap<TString, size_t> errorTypesCount;

        i64 lastAccess, timestamp;
        bool feedStatsProcessed = false;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode& row = input->GetRow();
            bool postpone = NYTUtils::FromNodeOrDefault<TString>(row[FIELD_ACTION], "") == ACTION_POSTPONE;
            const TString itemUrl = NYTUtils::FromNodeOrDefault<TString>(row[FIELD_DOCUMENT], "");
            if (itemUrl.empty() && !postpone) {
                // total feed info
                data(FIELD_HASH, row[FIELD_HASH]);
                lastAccess = NYTUtils::FromNodeOrDefault<i64>(row[FIELD_LAST_ACCESS], 0);
                timestamp = row[FIELD_TIMESTAMP].AsInt64();
                CopyNodeIfExists(row, data, FIELD_STATS);
                feedStatsProcessed = true;
            }
            // skip until valid feed stat not found
            if (SearchValidFeedStats && !feedStatsProcessed) {
                continue;
            }
            const TString saasKey = NYTUtils::FromNodeOrDefault<TString>(row[FIELD_SAAS_KEY], "");
            if (!itemUrl.empty() && items.Size() < MaxItems && saasKey != "") {
                // saving only items with preview
                NYT::TNode item;
                item(FIELD_LINK, itemUrl);
                item(FIELD_PREVIEW, saasKey);
                item(FIELD_HASH, row[FIELD_HASH]);
                CopyNodeIfExists(row, item, FIELD_TITLE);
                items.Add(item);
            }
            ConvertErrors(row[postpone ? FIELD_EVENTS : FIELD_ERRORS], postpone ? FIELD_CODE : FIELD_CODE_PUBLIC,
                          itemUrl, errors, errorTypesCount);
        }
        // sorting errors
        std::sort(errors.begin(), errors.end(), [&](const NYT::TNode &a, const NYT::TNode &b) {
            const TString &code1 = a[FIELD_CODE].AsString();
            const TString &code2 = b[FIELD_CODE].AsString();
            if (code1 != code2) {
                return code1 < code2;
            }
            i64 line1 = NYTUtils::GetNodeFieldOrDefault<i64>(a[FIELD_DETAILS], FIELD_LINE, 0);
            i64 line2 = NYTUtils::GetNodeFieldOrDefault<i64>(b[FIELD_DETAILS], FIELD_LINE, 0);
            if (line1 != line2) {
                return line1 < line2;
            }
            i64 column1 = NYTUtils::GetNodeFieldOrDefault<i64>(a[FIELD_DETAILS], FIELD_COLUMN, 0);
            i64 column2 = NYTUtils::GetNodeFieldOrDefault<i64>(b[FIELD_DETAILS], FIELD_COLUMN, 0);
            return column1 < column2;
        });

        // not saving empty stats
        if (feedStatsProcessed) {
            data(FIELD_ITEMS, items);
            data(FIELD_ERRORS, NYT::TNode::CreateList(errors));

            output->AddRow(NYT::TNode()
                                   (FIELD_FEED, feed)
                                   (FIELD_HOST, host)
                                   (FIELD_TYPE, type)
                                   (FIELD_DATA, data)
                                   (FIELD_LAST_ACCESS, lastAccess)
                                   (FIELD_TIMESTAMP, timestamp)
            );
        }
    }

    void ConvertErrors(const NYT::TNode &errorNode, const TString& codeField, const TString &item, TVector<NYT::TNode> &errors,
                       THashMap<TString, size_t>& errorTypesCount) {
        const static THashMap<TString, TVector<TString>> PROPS_TO_COPY_BY_ERROR {
                { "Banned.Document", {"timestamp"} },
                { "Duplicate.Document", {"feeds"} },
                { "Duplicate.Feed", {"sources"} },
                { "Email.BadDomain", {"email"} },
                { "Email.BadDomain.Invalid", {"email"} },
                { "Email.BadDomain.NotAllowed", {"email"} },
                { "Parser.Ad.BadId", {"type", "id", "params"} },
                { "Parser.Ad.Duplicate", {"type"} },
                { "Parser.Ad.UnknownType", {"type"} },
                { "Parser.BadFormat", {"tag"} },
                { "Parser.BadFormat.BrokenUtf", {"tag"} },
                { "Parser.BadFormat.DuplicateTag", {"tag"} },
                { "Parser.BadFormat.MissingTag", {"tag"} },
                { "Parser.BadFormat.NoText", {"tag"} },
                { "Parser.BadFormat.TextTooLong", {"tag", "size", "max_size"} },
                { "Parser.BadFormat.UnexpectedTag", {"tag"} },
                { "Parser.BadLogo", {"url", "logo"} },
                { "Parser.DuplicateUrls", {"url"} },
                { "Parser.Http3xx", {"http_code"} },
                { "Parser.Http4xx", {"http_code"} },
                { "Parser.Http5xx", {"http_code"} },
                { "Parser.Goal.BadId", {"id"} },
                { "Parser.Goal.Duplicate", {"id", "type"} },
                { "Parser.Goal.UnknownType", {"type"} },
                { "Parser.Item.BadEnclosure", {"url", "attribute"} },
                { "Parser.Item.BadPdaUrl", {"url"} },
                { "Parser.Item.BadRelated", {"url", "attribute"} },
                { "Parser.Item.ExternalLink", {"url"} },
                { "Parser.Item.MoreThanOneInpageAdBlock", {"platform", "inpageAdCount"} },
                { "Parser.Item.TurboContent.BadSocial", {"info"} },
                { "Parser.Item.TurboContent.ContradictingAttributes", {"info"} },
                { "Parser.Item.TurboContent.CustomHtmlInvalidClassAttr", {"info"} },
                { "Parser.Item.TurboContent.CustomHtmlInvalidIdAttr", {"info"} },
                { "Parser.Item.TurboContent.DuplicateElementId", {"info"} },
                { "Parser.Item.TurboContent.EmptyBlockInDynamicForm", {"info"} },
                { "Parser.Item.TurboContent.InvalidBlockTypeInDynamicForm", {"info"} },
                { "Parser.Item.TurboContent.InvalidHtmlClosingTag", {"info"} },
                { "Parser.Item.TurboContent.InvalidLineTypeInDynamicForm", {"info"} },
                { "Parser.Item.TurboContent.InvalidValue", {"info"} },
                { "Parser.Item.TurboContent.InvalidUrl", {"url"} },
                { "Parser.Item.TurboContent.InvalidUrlFragment", {"info"} },
                { "Parser.Item.TurboContent.MultipleBlocksOfTheSameTypeInDynamicForm", {"info"} },
                { "Parser.Item.TurboContent.NoItemsInWidgetFeedback", {"info"} },
                { "Parser.Item.TurboContent.NotAllGroupAttributesSpecified", {"info"} },
                { "Parser.Item.TurboContent.NoRequiredAttr", {"info"} },
                { "Parser.Item.TurboContent.SelfConstructedTurboUrl", {"info"} },
                { "Parser.Item.TurboContent.TooFewOptionsInDynamicForm", {"info"} },
                { "Parser.Item.TurboContent.UnknownCarouselItemType", {"info"} },
                { "Parser.Item.TurboContent.UnknownIcon", {"info"} },
                { "Parser.Item.TurboContent.UnknownWidgetFeedbackType", {"info"} },
                { "Parser.Tracker.BadId", {"type", "id", "params"} },
                { "Parser.Tracker.Duplicate", {"type", "id", "params"} },
                { "Parser.Tracker.UnknownType", {"type"} },
                { "Parser.XmlError", {"message"} },
                { "Document.QuotaExceeded.Images", {"current", "quota"} },
                { "Fetch.QuotaExceeded.Images", {"current", "quota", "items"} },
                { "Fetch.QuotaExceeded.Items", {"current", "quota"} },
                { "Images.FetchError", {"url", "http_code"} },
                { "Images.Unavailable", {"url", "http_code"} },
                { "Images.StillWaiting", {"url", "http_code"} },
                { "Skip.Images", {"url", "http_code"} },
                { "Parser.Item.TooManyRelatedItems", {"count", "max_count"} },
        };

        if (!errorNode.IsList()) {
            return;
        }
        const NYT::TNode::TListType& rawErrors = errorNode.AsList();
        for(const NYT::TNode& rawError : rawErrors) {
            const TString code = NYTUtils::FromNodeOrDefault<TString>(rawError[codeField], ERROR_UNKNOWN);
            // not saving too much error samples
            if (errorTypesCount[code] >= MaxErrors) {
                continue;
            }
            errorTypesCount[code]++;

            NYT::TNode error;
            error(FIELD_CODE, code);
            error(FIELD_SEVERITY, NYTUtils::FromNodeOrDefault<TString>(rawError[FIELD_SEVERITY], "error"));

            NYT::TNode details = NYT::TNode::CreateMap();
            // at least write item url
            if (!item.empty()) {
                details(FIELD_ITEM_URL, item);
            }

            NYT::TNode rawDetails = rawError[FIELD_DETAILS];
            if (rawDetails.IsMap()) {
                CopyNodeIfExists(rawDetails, details, FIELD_LINE);
                CopyNodeIfExists(rawDetails, details, FIELD_COLUMN);
                CopyNodeIfExists(rawDetails, details, FIELD_CONTEXT);
                // custom errors
                if (code == "Images.FetchError" && rawDetails[FIELD_ERRORS].IsMap()) {
                    // for each url - new error node
                    const NYT::TNode::TMapType &imageErrors = rawDetails[FIELD_ERRORS].AsMap();
                    NYT::TNode urls = NYT::TNode::CreateList();
                    for (const auto &pair : imageErrors) {
                        urls.Add(pair.first);
                    }
                    details(FIELD_URL, urls);
                } else {
                    if (PROPS_TO_COPY_BY_ERROR.contains(code)) {
                        for (const auto &prop : PROPS_TO_COPY_BY_ERROR.at(code)) {
                            CopyNodeIfExists(rawDetails, details, prop);
                        }
                    }
                }
            }
            error(FIELD_DETAILS, details);
            errors.push_back(error);
        }
    }

    void CopyNodeIfExists(const NYT::TNode &src, NYT::TNode &dst, const TString &propName) {
        if (src.HasKey(propName)) {
            dst(propName, src[propName]);
        }
    }

public:
    size_t MaxItems;
    size_t MaxErrors;
    bool SearchValidFeedStats;
}; // TTurboLogsReducer

REGISTER_REDUCER(TTurboLogsReducer)

struct TTurboFeedsHistoryReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
Y_SAVELOAD_JOB(MinTimestamp)
public:
    TTurboFeedsHistoryReducer() = default;
    TTurboFeedsHistoryReducer(time_t minTimestamp) : MinTimestamp(minTimestamp) {

    }

    void Do(TReader *input, TWriter *output) override {
        static const int INPUT_HISTORY = 1;
        static const int OUTPUT_HISTORY = 0;
        static const int OUTPUT_LAST_STATE = 1;
        NYT::TNode prevRow;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode row = input->GetRow();
            if (input->GetTableIndex() == INPUT_HISTORY) {
                // writing history without comparing
                if (MinTimestamp > 0 && row[FIELD_TIMESTAMP].AsInt64() > MinTimestamp) {
                    output->AddRow(row, OUTPUT_HISTORY);
                }
            } else {
                if (prevRow.HasValue()) {
                    if (prevRow[FIELD_DATA][FIELD_STATS] != row[FIELD_DATA][FIELD_STATS] ||
                        prevRow[FIELD_DATA][FIELD_ERRORS] != row[FIELD_DATA][FIELD_ERRORS]) {
                        output->AddRow(row, OUTPUT_HISTORY);
                    }
                } else {
                    output->AddRow(row, OUTPUT_HISTORY);
                }
            }
            prevRow = row;
        }
        if (prevRow.HasValue()) {
            output->AddRow(prevRow, OUTPUT_LAST_STATE);
        }
    }

    time_t MinTimestamp;
};
REGISTER_REDUCER(TTurboFeedsHistoryReducer)

struct TExtractFeedsMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    TExtractFeedsMapper() = default;

    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            NYT::TNode row = input->GetRow();
            const TStringBuf &domain = NUtils::RemoveScheme(row["Host"].AsString());
            NYT::TNode feedsNode = NYT::NodeFromJsonString(row["Feeds"].AsString());
            for (const NYT::TNode &feed : feedsNode.AsList()) {
                if (feed[FIELD_STATE].AsString() == "DELETED") {
                    continue;
                }
                output->AddRow(NYT::TNode()
                    (FIELD_DOMAIN, domain)
                    (FIELD_FEED, feed[FIELD_URL])
                    (FIELD_ACTIVE, feed[FIELD_STATE].AsString() == "ACTIVE")
                );
            }
        }
    }
}; // TExtractFeedsMapper
REGISTER_MAPPER(TExtractFeedsMapper)

struct TFeedsToDomainsReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    TFeedsToDomainsReducer() = default;

    void Do(TReader *input, TWriter *output) override {
        NYT::TNode out;
        out[FIELD_FEED] = input->GetRow()[FIELD_FEED];
        out[FIELD_ACTIVE] = input->GetRow()[FIELD_ACTIVE];
        NYT::TNode domains = NYT::TNode::CreateList();
        for (; input->IsValid(); input->Next()) {
            domains.Add(input->GetRow()[FIELD_DOMAIN]);
        }
        out[FIELD_DOMAINS] = domains;
        output->AddRow(out);
    }
}; // TFeedsToDomainsReducer
REGISTER_REDUCER(TFeedsToDomainsReducer)

struct TFeedsStatePropagatorReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    TFeedsStatePropagatorReducer() = default;

    void Do(TReader *input, TWriter *output) override {
        const int INPUT_FEEDS_STATE = 0;
        NYT::TNode feedState, feedDomains;
        for (; input->IsValid(); input->Next()) {
            if (input->GetTableIndex() == INPUT_FEEDS_STATE) {
                feedState = input->GetRow();
            } else {
                feedDomains = input->GetRow();
            }
        }
        if (feedDomains.HasValue() && feedState.HasValue()) {
            for (const NYT::TNode &domain : feedDomains[FIELD_DOMAINS].AsList()) {
                feedState[FIELD_DOMAIN] = domain;
                feedState[FIELD_ACTIVE] = feedDomains[FIELD_ACTIVE];
                output->AddRow(feedState);
            }
        }
    }
}; // TFeedsStatePropagatorReducer
REGISTER_REDUCER(TFeedsStatePropagatorReducer)

TString FillInputTables(const NYT::ITransactionPtr tx, const TString &dir, const TString &progressAttr,
                    TVector<NYT::TRichYPath> &inputTables, TMap<size_t, TString> &sourceMap, const TString &source) {
    const auto& config = TConfig::CInstance();
    TString newLastProcessedName;
    TRegularExpression regex("/(\\d+)$");
    TVector<TString> hits;
    TDeque<NYTUtils::TTableInfo> sourceTables;
    NYTUtils::GetTableList(tx, dir, sourceTables);
    TString lastProcessedName;
    // collect destination table names
    try {
        const NYT::TNode &lps = NYTUtils::GetAttr(tx, config.TABLE_DESTINATION_ROOT, progressAttr);
        lastProcessedName = lps.AsString();
    } catch (yexception &e) {
        lastProcessedName = "";
    }
    LOG_INFO("Last processed source for %s is %s", dir.c_str(), lastProcessedName.c_str());
    for (const auto& tableInfo : sourceTables) {
        if (regex.GetMatches(tableInfo.Name, hits) && tableInfo.RecordCount > 0) {
            const TString &tableName = NYTUtils::GetTableName(tableInfo.Name);
            if (tableName > lastProcessedName) {
                inputTables.push_back(tableInfo.Name);
                sourceMap[sourceMap.size()] = source;
                newLastProcessedName = std::max(tableName, newLastProcessedName);
            }
        }
    }
    return newLastProcessedName;
}

int ProcessTurboLogs(int, const char **) {
    const TConfig &config = TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(config.MR_TURBO_SERVER_HOST);
    NYT::ITransactionPtr tx = client->StartTransaction();
    LOG_INFO("Processing turbo logs");

    TString newLastProcessedName;
    TString newLastProcessedFastName;
    TString newLastProcessedYmlName;
    TMap<size_t, TString> sourceMap;
    TVector<NYT::TRichYPath> inputTables;
    newLastProcessedName = FillInputTables(tx, config.TABLE_SOURCE_LOGS_ROOT, LAST_PROCESSED_SOURCE_ATTR, inputTables, sourceMap, SOURCE_RSS);
    newLastProcessedFastName = FillInputTables(tx, config.TABLE_SOURCE_FAST_LOGS_ROOT, LAST_PROCESSED_FAST_SOURCE_ATTR, inputTables, sourceMap, SOURCE_RSS);
    newLastProcessedYmlName = FillInputTables(tx, config.TABLE_SOURCE_YML_LOGS_ROOT, LAST_PROCESSED_YML_SOURCE_ATTR, inputTables, sourceMap, SOURCE_YML);

    if (inputTables.empty()) {
        LOG_INFO("No new data. Exiting");
        tx->Commit();
        return 0;
    }

    // tables definitions
    NYT::TRichYPath domainsByFeeds(NYTUtils::JoinPath(config.TABLE_DESTINATION_ROOT, "domains-by-feeds"));
    NYT::TRichYPath feedsHistory(config.TABLE_FEEDS_HISTORY);
    NYT::TRichYPath tempHistory(NYTUtils::JoinPath(config.TABLE_DESTINATION_ROOT, "intm-feeds-history"));
    feedsHistory.Schema(NYT::TTableSchema()
                          .AddColumn(FIELD_FEED, NYT::EValueType::VT_STRING)
                          .AddColumn(FIELD_HOST, NYT::EValueType::VT_STRING)
                          .AddColumn(FIELD_TIMESTAMP, NYT::EValueType::VT_INT64)
                          .AddColumn(FIELD_DATA, NYT::EValueType::VT_ANY)
                          .AddColumn(FIELD_TYPE, NYT::EValueType::VT_STRING)
                          .AddColumn(FIELD_LAST_ACCESS, NYT::EValueType::VT_INT64));
    NYT::TRichYPath feedsByDomain(config.TABLE_FEEDS_BY_DOMAIN);
    feedsByDomain.Schema(NYT::TTableSchema()
                             .AddColumn(FIELD_DOMAIN, NYT::EValueType::VT_STRING)
                             .AddColumn(FIELD_FEED, NYT::EValueType::VT_STRING)
                             .AddColumn(FIELD_HOST, NYT::EValueType::VT_STRING)
                             .AddColumn(FIELD_ACTIVE, NYT::EValueType::VT_BOOLEAN)
                             .AddColumn(FIELD_TYPE, NYT::EValueType::VT_STRING)
                             .AddColumn(FIELD_TIMESTAMP, NYT::EValueType::VT_INT64)
                             .AddColumn(FIELD_LAST_ACCESS, NYT::EValueType::VT_INT64)
                             .AddColumn(FIELD_DATA, NYT::EValueType::VT_ANY));

    // compress history and compute last state
    time_t lastCleaningTs = 0;
    time_t now = Now().Seconds();
    try {
        const NYT::TNode &lps = NYTUtils::GetAttr(tx, feedsHistory.Path_, LAST_CLEANING_ATTR);
        lastCleaningTs = lps.AsInt64();
    } catch (yexception &e) {
        LOG_INFO("Error when reading last cleaning timestamp - %s", e.what());
    }
    bool needFullHistoryProcessing = (now - lastCleaningTs) > FEED_HISTORY_CLEANING_PERIOD || !tx->Exists(feedsHistory.Path_);

    TMaybe<yexception> error;
    auto processRawLogsAndHistory = [&] {
        try {
            LOG_INFO("Converting raw turbo logs");
            // process raw turbo-logs
            NYT::TRichYPath intmLogs(NYTUtils::JoinPath(config.TABLE_DESTINATION_ROOT, "intm-logs"));
            TMapReduceCmd <TExtractTimestampMapper, TTurboLogsReducer> mapReduceCmd(tx,
                new TExtractTimestampMapper(SOURCE_WEBMASTER, sourceMap),
                new TTurboLogsReducer(MAX_ITEMS, MAX_ERRORS, true));
            for (const NYT::TRichYPath &input : inputTables) {
                mapReduceCmd.Input<NYT::TNode>(input);
            }
            mapReduceCmd
                .Output<NYT::TNode>(intmLogs)
                .ReduceBy({FIELD_FEED, FIELD_TIMESTAMP})
                .SortBy({FIELD_FEED, FIELD_TIMESTAMP, FIELD_HASH})
                .ReducerMemoryLimit(1_GBs)
                .Do();

            TSortCmd<NYT::TNode>(tx)
                .Input<NYT::TNode>(intmLogs)
                .Output<NYT::TNode>(intmLogs)
                .By({FIELD_FEED, FIELD_TIMESTAMP})
                .Do();

            LOG_INFO("Reducing turbo feeds history");
            // compress history and compute last state
            time_t lastCleaningTs = 0;
            time_t now = Now().Seconds();
            try {
                const NYT::TNode &lps = NYTUtils::GetAttr(tx, feedsHistory.Path_, LAST_CLEANING_ATTR);
                lastCleaningTs = lps.AsInt64();
            } catch (yexception &e) {
                LOG_INFO("Error when reading last cleaning timestamp - %s", e.what());
            }
            if (needFullHistoryProcessing) {
                LOG_INFO("Reducing history with cleaning");
                TReduceCmd<TTurboFeedsHistoryReducer> reducer(tx, new TTurboFeedsHistoryReducer(now - MAX_FEED_HISTORY_AGE));
                reducer.Input<NYT::TNode>(intmLogs);
                if (tx->Exists(feedsHistory.Path_)) {
                    reducer.Input<NYT::TNode>(feedsHistory);
                }
                reducer
                    .Output<NYT::TNode>(feedsHistory)
                    .Output<NYT::TNode>(config.TABLE_FEEDS_LAST_STATE)
                    .ReduceBy(FIELD_FEED)
                    .SortBy({FIELD_FEED, FIELD_TIMESTAMP})
                    .Do();
                NYTUtils::SetAttr(tx, feedsHistory.Path_, LAST_CLEANING_ATTR, now);
            } else {
                LOG_INFO("Reducing history with only appending new records");
                TReduceCmd<TTurboFeedsHistoryReducer>(tx, new TTurboFeedsHistoryReducer(0))
                    .Input<NYT::TNode>(intmLogs)
                    .Input<NYT::TNode>(config.TABLE_FEEDS_LAST_STATE)
                    .Output<NYT::TNode>(tempHistory)
                    .Output<NYT::TNode>(config.TABLE_FEEDS_LAST_STATE)
                    .ReduceBy(FIELD_FEED)
                    .SortBy({FIELD_FEED, FIELD_TIMESTAMP})
                    .Do();
            }
            LOG_INFO("Reducing turbo feeds history finished successfully");
        } catch (yexception &e) {
            error = e;
        };
    };

    auto collectDomainsByFeeds = [&] {
        try {
            LOG_INFO("Collecting domains by feeds");
            TMapReduceCmd<TExtractFeedsMapper, TFeedsToDomainsReducer>(tx)
                .Input<NYT::TNode>(config.TABLE_TURBO_CONFIG)
                .Output<NYT::TNode>(domainsByFeeds)
                .ReduceBy(FIELD_FEED)
                .Do();

            TSortCmd<NYT::TNode>(tx)
                .Input<NYT::TNode>(domainsByFeeds.Path_)
                .Output<NYT::TNode>(domainsByFeeds.Path_)
                .By(FIELD_FEED)
                .Do();
            LOG_INFO("Collecting domains by feeds finished successfully");
        } catch (yexception &e) {
            error = e;
        };
    };

    NUtils::RunAsync(processRawLogsAndHistory, collectDomainsByFeeds);
    if (error.Defined()) {
        throw error.Get();
    }

    // working in parallel
    auto sortHistoryTable = [&] {
        try {
            if (needFullHistoryProcessing) {
                LOG_INFO("Sorting feeds history");
                TSortCmd<NYT::TNode>(tx)
                    .Input<NYT::TNode>(feedsHistory.Path_)
                    .Output<NYT::TNode>(feedsHistory.Path_)
                    .By({FIELD_FEED, FIELD_TIMESTAMP})
                    .Do();
            } else {
                LOG_INFO("Merging feeds history");
                TSortCmd<NYT::TNode>(tx)
                    .Input<NYT::TNode>(tempHistory.Path_)
                    .Output<NYT::TNode>(tempHistory.Path_)
                    .By({FIELD_FEED, FIELD_TIMESTAMP})
                    .Do();
                // merge with main history
                TMergeCmd<NYT::TNode>(tx)
                    .Input<NYT::TNode>(feedsHistory)
                    .Input<NYT::TNode>(tempHistory.Path_)
                    .Sorted()
                    .Output<NYT::TNode>(feedsHistory)
                    .MergeBy({FIELD_FEED, FIELD_TIMESTAMP})
                    .Do();
            }
        } catch (yexception &e) {
            error = e;
        };
    };
    auto propagateFeedStateToDomains = [&] {
        try {
            LOG_INFO("Propagating feeds last state");
            TSortCmd<NYT::TNode>(tx)
                .Input<NYT::TNode>(config.TABLE_FEEDS_LAST_STATE)
                .Output<NYT::TNode>(config.TABLE_FEEDS_LAST_STATE)
                .By({FIELD_FEED, FIELD_TIMESTAMP})
                .Do();
            LOG_INFO("Propagating feeds statuses to domains");
            TReduceCmd<TFeedsStatePropagatorReducer>(tx)
                .Input<NYT::TNode>(config.TABLE_FEEDS_LAST_STATE)
                .Input<NYT::TNode>(domainsByFeeds)
                .Output<NYT::TNode>(feedsByDomain)
                .ReduceBy(FIELD_FEED)
                .Do();

            TSortCmd<NYT::TNode>(tx)
                .Input<NYT::TNode>(feedsByDomain.Path_)
                .Output<NYT::TNode>(feedsByDomain.Path_)
                .By(FIELD_DOMAIN)
                .Do();
            LOG_INFO("Propagating feeds last state finished successfully");
        } catch (yexception &e) {
            error = e;
        };
    };
    NUtils::RunAsync(sortHistoryTable, propagateFeedStateToDomains);
    if (error.Defined()) {
        throw error.Get();
    }

    if (!newLastProcessedName.empty()) {
        NYTUtils::SetAttr(tx, config.TABLE_DESTINATION_ROOT, LAST_PROCESSED_SOURCE_ATTR,
                newLastProcessedName);
    }
    if (!newLastProcessedFastName.empty()) {
        NYTUtils::SetAttr(tx, config.TABLE_DESTINATION_ROOT, LAST_PROCESSED_FAST_SOURCE_ATTR,
                newLastProcessedFastName);
    }
    if (!newLastProcessedYmlName.empty()) {
        NYTUtils::SetAttr(tx, config.TABLE_DESTINATION_ROOT, LAST_PROCESSED_YML_SOURCE_ATTR,
                newLastProcessedYmlName);
    }

    LOG_INFO("Finished successfully");
    tx->Commit();
    return 0;
}

// API logs
int ProcessTurboApiLogs(const TString &mrServerHost, const TString &sourceLogs, const TString &destination) {
    NYT::IClientPtr client = NYT::CreateClient(mrServerHost);
    NYT::ITransactionPtr tx = client->StartTransaction();
    LOG_INFO("About to process turbo api logs on cluster %s at path %s", mrServerHost.c_str(), sourceLogs.c_str());
    // last non-empty table
    TRegularExpression regex("/(.+)\\.logs$");
    TVector<TString> hits;
    TDeque<NYTUtils::TTableInfo> sourceTables;
    NYTUtils::GetTableList(tx, sourceLogs, sourceTables);
    // collect destination table names
    TVector<TString> inputTables;
    TString lastProcessedTable = "0", newLastProcessedTable;
    // check for last processed source
    TMap<size_t, TString> sourceMap;
    try {
        const NYT::TNode &lps = NYTUtils::GetAttr(tx, destination, LAST_PROCESSED_SOURCE_ATTR);
        lastProcessedTable = lps.AsString();
    } catch (yexception &ignored) {
    }
    time_t lastTimestamp = FromString<time_t>(lastProcessedTable);
    time_t newlastTimestamp = FromString<time_t>(lastProcessedTable);
    for (const auto& tableInfo : sourceTables) {
        if (regex.GetMatches(tableInfo.Name, hits) && tableInfo.RecordCount > 0) {
            if (tableInfo.Time > lastTimestamp) {
                inputTables.push_back(tableInfo.Name);
                sourceMap[sourceMap.size()] = "api";
                newlastTimestamp = std::max(newlastTimestamp, tableInfo.Time);
            }
        }
    }
    newLastProcessedTable = ToString<i64>(newlastTimestamp);
    if (inputTables.empty()) {
        LOG_INFO("No new data on cluster %s at path %s. Exiting", mrServerHost.c_str(), sourceLogs.c_str());
        tx->Commit();
        return 0;
    }

    LOG_INFO("Processing %lu tables on cluster %s at path %s", inputTables.size(), mrServerHost.c_str(), sourceLogs.c_str());
    NYT::TRichYPath output(NYTUtils::JoinPath(destination, newLastProcessedTable));
    output.Schema(NYT::TTableSchema()
                          .AddColumn(FIELD_FEED, NYT::EValueType::VT_STRING)
                          .AddColumn(FIELD_HOST, NYT::EValueType::VT_STRING)
                          .AddColumn(FIELD_TYPE, NYT::EValueType::VT_STRING)
                          .AddColumn(FIELD_TIMESTAMP, NYT::EValueType::VT_INT64)
                          .AddColumn(FIELD_LAST_ACCESS, NYT::EValueType::VT_INT64)
                          .AddColumn(FIELD_DATA, NYT::EValueType::VT_ANY));

    TMapReduceCmd<TExtractTimestampMapper, TTurboLogsReducer> mapReduceCmd(tx,
        new TExtractTimestampMapper(SOURCE_API, sourceMap), new TTurboLogsReducer(UNLIMITED_MAX, UNLIMITED_MAX, false));
    for (const auto &input : inputTables) {
        mapReduceCmd.Input<NYT::TNode>(input);
    }
    mapReduceCmd
        .Output<NYT::TNode>(output)
        .ReduceBy({FIELD_FEED, FIELD_TIMESTAMP})
        .SortBy({FIELD_FEED, FIELD_TIMESTAMP, FIELD_HASH})
        .ReducerMemoryLimit(1_GBs)
        .Do();

    TSortCmd<NYT::TNode>(tx)
        .Input<NYT::TNode>(output.Path_)
        .Output<NYT::TNode>(output.Path_)
        .By({FIELD_FEED, FIELD_TIMESTAMP})
        .Do();

    if (!newLastProcessedTable.empty()) {
        NYTUtils::SetAttr(tx, destination, LAST_PROCESSED_SOURCE_ATTR, newLastProcessedTable);
    }
    LOG_INFO("Completed successfully on cluster %s at path %s", mrServerHost.c_str(), sourceLogs.c_str());

    tx->Commit();
    return 0;
}

int ProcessTurboApiLogs(int , const char **) {
    const TConfig &config = TConfig::CInstance();
    TMaybe<yexception> error;
    auto processApiLogsHahn = [&] {
        try {
            ProcessTurboApiLogs(config.MR_TURBO_SERVER_HOST, config.TABLE_SOURCE_API_LOGS, config.TABLE_DESTINATION_API_LOGS_ROOT);
        } catch (yexception &e) {
            error = e;
        };
    };
    auto processApiLogsArnold = [&] {
        try {
            ProcessTurboApiLogs(config.MR_TURBO_BACKUP_SERVER_HOST, config.TABLE_SOURCE_API_LOGS, config.TABLE_DESTINATION_API_LOGS_ROOT);
        } catch (yexception &e) {
            error = e;
        };
    };
    auto processApiDebugLogsHahn = [&] {
        try {
            ProcessTurboApiLogs(config.MR_TURBO_SERVER_HOST, config.TABLE_SOURCE_API_DEBUG_LOGS, config.TABLE_DESTINATION_API_DEBUG_LOGS_ROOT);
        } catch (yexception &e) {
            error = e;
        };
    };
    auto processApiDebugLogsArnold = [&] {
        try {
            ProcessTurboApiLogs(config.MR_TURBO_BACKUP_SERVER_HOST, config.TABLE_SOURCE_API_DEBUG_LOGS, config.TABLE_DESTINATION_API_DEBUG_LOGS_ROOT);
        } catch (yexception &e) {
            error = e;
        };
    };
    NUtils::RunAsync(processApiLogsHahn, processApiLogsArnold, processApiDebugLogsHahn, processApiDebugLogsArnold);
    if (error.Defined()) {
        throw error.Get();
    }
    return 0;
}

} //namespace NTurbo
} //namespace NWebmaster

