#include <util/generic/size_literals.h>
#include <util/stream/zlib.h>

#include <library/cpp/yson/node/node_io.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>
#include <robot/jupiter/protos/acceptance.pb.h>

#include <wmconsole/version3/processors/feeds/feeds.pb.h>
#include <wmconsole/version3/wmcutil/compress.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/regex.h>
#include <wmconsole/version3/wmcutil/thread.h>
#include <wmconsole/version3/wmcutil/url.h>

#include "config.h"
#include "process_offer_logs.h"
#include "field_names.h"

using namespace NJupiter;

namespace NWebmaster {
namespace NFeeds {

const char* LAST_PROCESSED_SOURCE_ATTR = "lastProcessedSource";
const char* LAST_PROCESSED_TURBO_SOURCE_ATTR = "lastProcessedTurboSource";
const char* LAST_PROCESSED_BY_FEED_SOURCE_ATTR = "lastProcessedByFeedSource";
const char* LAST_PROCESSED_TOTAL_SOURCE_ATTR = "lastProcessedTotalSource";

TInputTag<NProto::TOfferLogRecord>           RawLogsInputTag      (0);
TInputTag<NProto::TFeedHistoryRecord>        LastStateInputTag    (1);
TInputTag<NProto::TFeedSerpdataRecord>       IntmSerpdataInputTag (2);
TInputTag<NProto::TSetWithFeedRecord>        SetWithFeedInputTag  (3);
TInputTag<NProto::TSetWithFeedRecord>        NativeFeedsInputTag  (4);
TInputTag<NProto::TSetWithFeedRecord>        OfferBaseSetsInputTag(5);

TOutputTag<NProto::TFeedHistoryRecord>       HistoryOutputTag     (0);
TOutputTag<NProto::TFeedHistoryRecord>       LastStateOutputTag   (1);
TOutputTag<NProto::TFeedSerpdataRecord>      IntmSerpdataOutputTag(2);
TOutputTag<NProto::TSetWithFeedRecord>       SetsWithFeedsOutputTag(4);

const char* LAST_CLEANING_ATTR = "lastCleaning";
const char* LATEST_MERGED_TABLE_ATTR = "latest_merged_table";
const char* HISTORY_TABLE = "history";
const char* TURBO_TIMESTAMP_ATTR = "turbo_timestamp";

const char* ACTION_POSTPONE = "postpone";

const char* ERROR_UNKNOWN = "Unknown";
const NYT::TNode EMPTY_DETAILS = NYT::TNode::CreateMap();

const size_t MAX_ERRORS_BY_TYPE = 5;
//const size_t MAX_LAST_STATES = 10;

static TString EscapeText(const TString &s) {
    const static TWtringBuf WHAT(u"\"");
    const static TWtringBuf WITH(u"\\\"");
    TUtf16String wide = UTF8ToWide(s);
    SubstGlobal(wide, WHAT, WITH);
    return "\"" + WideToUTF8(wide) + "\"";
}

struct TExtractTimestampMapper : public NYT::IMapper<NYT::TTableReader<NProto::TOfferLogRecord>, NYT::TTableWriter<NProto::TOfferLogRecord>> {
public:
    TExtractTimestampMapper() = default;

    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            NProto::TOfferLogRecord row = input->GetRow();
            const TString &hash = row.GetHash();
            size_t del = hash.find('#');
            i64 timestamp = FromString<i64>(hash.substr(0, del));
            row.SetTimestamp(timestamp);
            TString host = row.GetWmcHost();
            if (host.empty()) {
                host = NUtils::FixDomainPrefix(NUtils::GetHostWithoutScheme(row.GetFeed()));
                row.SetWmcHost(host);
            }
            output->AddRow(row);
        }
    }

};
REGISTER_MAPPER(TExtractTimestampMapper)

struct TExtractSerpdataTimestampMapper : public NYT::IMapper<NYT::TTableReader<NProto::TFeedSerpdataRecord>, NYT::TTableWriter<NProto::TFeedSerpdataRecord>> {
    Y_SAVELOAD_JOB(TableTimestamps)
public:
    TExtractSerpdataTimestampMapper() = default;
    TExtractSerpdataTimestampMapper(const THashMap<size_t, ui64> &tableTimestamps) : TableTimestamps(tableTimestamps) {
    }

    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            NProto::TFeedSerpdataRecord row = input->GetRow();
            if (TableTimestamps.contains(input->GetTableIndex())) {
                row.SetTimestamp(TableTimestamps.at(input->GetTableIndex()));
            }
            if (!row.HasFeedUrl()) {
                row.SetFeedUrl("");
            }
            output->AddRow(row);
        }
    }
private:
    THashMap<size_t, ui64> TableTimestamps;
};
REGISTER_MAPPER(TExtractSerpdataTimestampMapper)

struct TRemoveDuplicatesReducer : public NYT::IReducer<NYT::TTableReader<NProto::TSetWithFeedRecord>, NYT::TTableWriter<NProto::TSetWithFeedRecord>> {
public:
    TRemoveDuplicatesReducer() = default;

    void Do(TReader *input, TWriter *output) override {
        output->AddRow(input->GetRow());
    }
};
REGISTER_REDUCER(TRemoveDuplicatesReducer)

struct TExtractSetUrlsMapper : public NYT::IMapper<NYT::TTableReader<NProto::TOfferBaseRecord>, NYT::TTableWriter<NProto::TSetWithFeedRecord>> {
public:
    TExtractSetUrlsMapper() = default;

    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            NProto::TOfferBaseRecord row = input->GetRow();
            if (row.GetFeed().empty() || row.GetSets().empty()) {
                continue;
            }
            // parse sets
            const NYT::TNode& setsNode = NYT::NodeFromYsonString(row.GetSets());
            for (const NYT::TNode& setNode : setsNode.AsList()) {
                NProto::TSetWithFeedRecord result;
                result.SetFeed(row.GetFeed());
                result.SetSet(setNode["url"].AsString());
                output->AddRow(result);
            }
        }
    }
};
REGISTER_MAPPER(TExtractSetUrlsMapper)

struct TJoinOfferBaseSetsWithFeeds : public TTaggedReducer {
public:
    TJoinOfferBaseSetsWithFeeds() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        /*auto feedHosts = reader.GetRows(NativeFeedsInputTag);
        for (NProto::TSetWithFeedRecord set : reader.GetRows(OfferBaseSetsInputTag)) {
            for (const NProto::TSetWithFeedRecord& feedHost : feedHosts) {
                set.SetHost(feedHost.GetHost());
                writer.AddRow(set, SetsWithFeedsOutputTag);
            }
        }*/

        TVector<NProto::TSetWithFeedRecord> obRecords;
        TVector<TString> hosts;
        for (; reader.IsValid(); reader.Next()) {
            if (reader.IsCurrentTable(OfferBaseSetsInputTag)) {
                obRecords.emplace_back(reader.GetRow(OfferBaseSetsInputTag));
            } else {
                hosts.emplace_back(reader.GetRow(NativeFeedsInputTag).GetHost());
            }
        }
        for (NProto::TSetWithFeedRecord obRecord : obRecords) {
            for (const TString &host : hosts) {
                obRecord.SetHost(host);
                writer.AddRow(obRecord, SetsWithFeedsOutputTag);
            }
        }
    }
};
REGISTER_REDUCER(TJoinOfferBaseSetsWithFeeds)

struct TGenerateFakeRecordsMapper : public NYT::IMapper<NYT::TTableReader<NProto::TFeedHistoryRecord>, NYT::TTableWriter<NProto::TFeedSerpdataRecord>> {
    Y_SAVELOAD_JOB(TotalThemes, FeedThemes)
public:
    TGenerateFakeRecordsMapper() = default;
    TGenerateFakeRecordsMapper(THashMap<TString, ui64> totalThemes, THashMap<TString, ui64> feedThemes) :
        TotalThemes(totalThemes), FeedThemes(feedThemes) {
    }

    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            NProto::TFeedHistoryRecord row = input->GetRow();
            if (row.GetFeed().empty()) {
                if (TotalThemes.contains(row.GetType())) {
                    NProto::TFeedSerpdataRecord result;
                    result.SetHost(row.GetHost());
                    result.SetFeedUrl("");
                    result.SetTheme(row.GetType());
                    result.SetTimestamp(TotalThemes.at(row.GetType()));
                    output->AddRow(result);
                }
            } else {
                if (FeedThemes.contains(row.GetType())) {
                    NProto::TFeedSerpdataRecord result;
                    result.SetHost(row.GetHost());
                    result.SetFeedUrl(row.GetFeed());
                    result.SetTheme(row.GetType());
                    result.SetTimestamp(FeedThemes.at(row.GetType()));
                    output->AddRow(result);
                }
            }
        }
    }
private:
    THashMap<TString, ui64> TotalThemes;
    THashMap<TString, ui64> FeedThemes;
};
REGISTER_MAPPER(TGenerateFakeRecordsMapper)

struct TOfferLogsReducer : public NYT::IReducer<NYT::TTableReader<NProto::TOfferLogRecord>, NYT::TTableWriter<::google::protobuf::Message>> {
public:
    TOfferLogsReducer() = default;

    void Do(TReader *input, TWriter *output) override {
        static const int OUTPUT_HISTORY = 0;
        static const int OUTPUT_LAST_STATE = 1;
        static const TString HEADER = "\"code\",\"severity\",\"line\",\"column\",\"url\",\"message\",\"context\"\n";
        static const THashSet<TString> IGNORED_ERRORS = {"Images.StillWaiting"};
        static const NYT::TNode::TListType EMPTY_LIST = NYT::TNode::CreateList().AsList();

        const TString feed = input->GetRow().GetFeed();
        if (feed.Empty()) {
            return;
        }

        NProto::TFeedHistoryRecord result;
        NUtils::TChunk gzChunk;
        gzChunk.Write(HEADER.data(), HEADER.size());
        THashMap<TString, NYT::TNode> errors;

        TString host = input->GetRow().GetWmcHost();
        if (host.empty()) {
            host = NUtils::FixDomainPrefix(NUtils::GetHostWithoutScheme(feed));
        }

        bool feedStatsProcessed = false;
        THashMap<TString, ui64> offerStats;
        offerStats["error"] = 0;
        offerStats["info"] = 0;
        offerStats["warning"] = 0;
        offerStats["ok"] = 0;
        THashMap<TString, ui64> errorStats;
        errorStats["error"] = 0;
        errorStats["info"] = 0;
        errorStats["warning"] = 0;
        errorStats["ok"] = 0;

        for (; input->IsValid(); input->Next()) {
            const NProto::TOfferLogRecord& row = input->GetRow();
            bool postpone = row.GetAction()  == ACTION_POSTPONE;
            const TString itemUrl = row.GetUrl();
            if (itemUrl.empty() && !postpone) {
                // total feed info
                result.SetHash(row.GetHash());
                result.SetLastAccess(row.GetLastAccess());
                result.SetTimestamp(row.GetTimestamp());
                //result.SetStats(row.GetStats());
                feedStatsProcessed = true;
            }
            // skip until valid feed stat not found
            if (!feedStatsProcessed) {
                continue;
            }
            NYT::TNode::TListType rawErrors = row.GetErrors().empty() ? EMPTY_LIST : NYT::NodeFromYsonString(row.GetErrors()).AsList();
            size_t validErrors = 0;
            for (NYT::TNode& error : rawErrors) {
                error(FIELD_ITEM_URL, itemUrl);
                const TString &code = NYTUtils::GetNodeFieldOrDefault<TString>(error, FIELD_CODE_PUBLIC, "");
                if (code.empty() || IGNORED_ERRORS.contains(code)) {
                    continue;
                }
                validErrors++;
                if (errors[code].IsUndefined()) {
                    errors[code] = NYT::TNode::CreateList();
                }
                if (errors[code].Size() < MAX_ERRORS_BY_TYPE) {
                    error[FIELD_ITEM_URL] = row.GetUrl();
                    errors[code].Add(error);
                }
                const NYT::TNode &details = NYTUtils::GetNodeFieldOrDefault<NYT::TNode>(error, "details", EMPTY_DETAILS);
                TStringBuilder csvBuilder;
                csvBuilder
                    << code << ","
                    << NYTUtils::GetNodeFieldOrDefault<TString>(error, "severity", "") << ","
                    << NYTUtils::GetNodeFieldOrDefault<i64>(details, "line", 0) << ","
                    << NYTUtils::GetNodeFieldOrDefault<i64>(details, "column", 0) << ","
                    << EscapeText(row.GetUrl()) << ","
                    << EscapeText(NYTUtils::GetNodeFieldOrDefault<TString>(error, "message", "")) << ","
                    << EscapeText(NYTUtils::GetNodeFieldOrDefault<TString>(details, "context", "")) << "\n";
                gzChunk.Write(csvBuilder.data(), csvBuilder.size());

                if (gzChunk.Overflow()) {
                    NProto::TFeedHistoryArchiveRecord downloadRecord;
                    downloadRecord.SetHost(host);
                    downloadRecord.SetFeed(feed);
                    downloadRecord.SetTimestamp(result.GetTimestamp());
                    downloadRecord.SetLastAccess(result.GetLastAccess());
                    downloadRecord.SetPartition(ToString(gzChunk.No++));
                    downloadRecord.SetData(TString(gzChunk.Data(), gzChunk.Size()));
                    output->AddRow(downloadRecord, OUTPUT_LAST_STATE);
                    gzChunk.Clear();
                }

                errorStats[NYTUtils::GetNodeFieldOrDefault<TString>(error, "severity", "error")]++;
            }
            if (!itemUrl.empty() && (rawErrors.size() == 0 || validErrors > 0)) {
                offerStats[row.GetStatus()]++;
            }
        }
        // not saving empty stats
        if (feedStatsProcessed) {
            result.SetStats(NYT::NodeToYsonString(NYT::TNode()
                    ("error", offerStats.at("error"))
                    ("warning", offerStats.at("warning"))
                    ("info", offerStats.at("info"))
                    ("ok", offerStats.at("ok"))
                )
            );
            result.SetErrorStats(NYT::NodeToYsonString(NYT::TNode()
                    ("error", errorStats.at("error"))
                    ("warning", errorStats.at("warning"))
                    ("info", errorStats.at("info"))
                    ("ok", errorStats.at("ok"))
                )
            );
            result.SetErrors(NYT::NodeToYsonString(NYT::TNode::CreateMap(errors)));
            result.SetFeed(feed);
            result.SetHost(host);
            output->AddRow(result, OUTPUT_HISTORY);
        }
        gzChunk.Finish();
        if (gzChunk.Size() > 0) {
            NProto::TFeedHistoryArchiveRecord downloadRecord;
            downloadRecord.SetHost(host);
            downloadRecord.SetFeed(feed);
            downloadRecord.SetTimestamp(result.GetTimestamp());
            downloadRecord.SetLastAccess(result.GetLastAccess());
            downloadRecord.SetPartition(ToString(gzChunk.No++));
            downloadRecord.SetData(TString(gzChunk.Data(), gzChunk.Size()));
            output->AddRow(downloadRecord, OUTPUT_LAST_STATE);
        }
    }
}; // TTurboLogsReducer

REGISTER_REDUCER(TOfferLogsReducer)

struct TSerpdataAndSetsJoinReducer : public TTaggedReducer {
public:
    TSerpdataAndSetsJoinReducer() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TVector<NProto::TFeedSerpdataRecord> serpdataRecords;
        TVector<TString> hosts;
        for (; reader.IsValid(); reader.Next()) {
            if (reader.IsCurrentTable(IntmSerpdataInputTag)) {
                serpdataRecords.emplace_back(reader.GetRow(IntmSerpdataInputTag));
            } else {
                hosts.emplace_back(reader.GetRow(SetWithFeedInputTag).GetHost());
            }
        }
        for (NProto::TFeedSerpdataRecord serpdataRecord : serpdataRecords) {
            for (const TString &host : hosts) {
                serpdataRecord.SetHost(host);
                writer.AddRow(serpdataRecord, IntmSerpdataOutputTag);
            }
        }
    }
};
REGISTER_REDUCER(TSerpdataAndSetsJoinReducer)

struct TSerpdataLogsReducer : public NYT::IReducer<NYT::TTableReader<NProto::TFeedSerpdataRecord>, NYT::TTableWriter<::google::protobuf::Message>> {
public:
    TSerpdataLogsReducer() = default;

    void Do(TReader *input, TWriter *output) override {
        static const int OUTPUT_HISTORY = 0;
        static const int OUTPUT_LAST_STATE = 1;
        static const TString HEADER = "\"code\",\"severity\",\"line\",\"column\",\"url\",\"message\",\"context\"\n";

        NProto::TFeedHistoryRecord result;
        NUtils::TChunk gzChunk;
        gzChunk.Write(HEADER.data(), HEADER.size());
        THashMap<TString, NYT::TNode> errors;
        THashMap<TString, ui64> errorStats;
        errorStats["error"] = 0;
        errorStats["info"] = 0;
        errorStats["warning"] = 0;
        errorStats["ok"] = 0;
        for (; input->IsValid(); input->Next()) {
            const NProto::TFeedSerpdataRecord &row = input->GetRow();
            if (result.GetHost().empty()) {
                result.SetHost(row.GetHost());
                result.SetFeed(row.GetFeedUrl());
                result.SetType(row.GetTheme());
                result.SetTimestamp(row.GetTimestamp());
                result.SetLastAccess(row.GetTimestamp());
            }
            if (row.GetError().empty()) {
                continue; // feeds_indexation_finish or total
            }
            NYT::TNode error = NYT::NodeFromYsonString(row.GetError());
            const TString &code = NYTUtils::GetNodeFieldOrDefault<TString>(error, FIELD_CODE_PUBLIC, "");
            if (code.empty()) {
                continue;
            }
            if (errors[code].IsUndefined()) {
                errors[code] = NYT::TNode::CreateList();
            }
            if (errors[code].Size() < MAX_ERRORS_BY_TYPE) {
                error[FIELD_ITEM_URL] = row.GetUrl();
                errors[code].Add(error);
            }
            const NYT::TNode &details = NYTUtils::GetNodeFieldOrDefault<NYT::TNode>(error, "details", NYT::TNode::CreateMap());
            TStringBuilder csvBuilder;
            csvBuilder
                << code << ","
                << NYTUtils::GetNodeFieldOrDefault<TString>(error, "severity", "") << ","
                << NYTUtils::GetNodeFieldOrDefault<i64>(details, "line", 0) << ","
                << NYTUtils::GetNodeFieldOrDefault<i64>(details, "column", 0) << ","
                << EscapeText(row.GetUrl()) << ","
                << EscapeText(NYTUtils::GetNodeFieldOrDefault<TString>(error, "message", "")) << ","
                << EscapeText(NYTUtils::GetNodeFieldOrDefault<TString>(details, "context", "")) << "\n";
            gzChunk.Write(csvBuilder.data(), csvBuilder.size());
            errorStats[NYTUtils::GetNodeFieldOrDefault<TString>(error, "severity", "")]++;

            if (gzChunk.Overflow()) {
                NProto::TFeedHistoryArchiveRecord downloadRecord;
                downloadRecord.SetHost(result.GetHost());
                downloadRecord.SetFeed(result.GetFeed());
                downloadRecord.SetTimestamp(result.GetTimestamp());
                downloadRecord.SetLastAccess(result.GetLastAccess());
                downloadRecord.SetPartition(ToString(gzChunk.No++));
                downloadRecord.SetData(TString(gzChunk.Data(), gzChunk.Size()));
                output->AddRow(downloadRecord, OUTPUT_LAST_STATE);
                gzChunk.Clear();
            }
        }
        result.SetStats(NYT::NodeToYsonString(NYT::TNode()
                ("error", errorStats.at("error"))
                ("warning", errorStats.at("warning"))
                ("info", errorStats.at("info"))
                ("ok", errorStats.at("ok"))
            )
        );
        result.SetErrorStats(NYT::NodeToYsonString(NYT::TNode()
                ("error", errorStats.at("error"))
                ("warning", errorStats.at("warning"))
                ("info", errorStats.at("info"))
                ("ok", errorStats.at("ok"))
            )
        );
        result.SetErrors(NYT::NodeToYsonString(NYT::TNode::CreateMap(errors)));
        output->AddRow(result, OUTPUT_HISTORY);
        gzChunk.Finish();
        if (gzChunk.Size() > 0) {
            NProto::TFeedHistoryArchiveRecord downloadRecord;
            downloadRecord.SetHost(result.GetHost());
            downloadRecord.SetFeed(result.GetFeed());
            downloadRecord.SetTimestamp(result.GetTimestamp());
            downloadRecord.SetLastAccess(result.GetLastAccess());
            downloadRecord.SetPartition(ToString(gzChunk.No++));
            downloadRecord.SetData(TString(gzChunk.Data(), gzChunk.Size()));
            output->AddRow(downloadRecord, OUTPUT_LAST_STATE);
        }
    }
};
REGISTER_REDUCER(TSerpdataLogsReducer)

struct TFeedsHistoryReducer : public NYT::IReducer<NYT::TTableReader<NProto::TFeedHistoryRecord>, NYT::TTableWriter<NProto::TFeedHistoryRecord>> {
public:
    TFeedsHistoryReducer() = default;

    void Do(TReader *input, TWriter *output) override {
        static const int INPUT_LAST_STATE = 1;
        static const int OUTPUT_HISTORY = 0;
        static const int OUTPUT_LAST_STATE = 1;
        i64 prevLastAccess = 0;
        NProto::TFeedHistoryRecord row;
        for (; input->IsValid(); input->Next()) {
            row = input->GetRow();
            if (input->GetTableIndex() != INPUT_LAST_STATE && prevLastAccess != row.GetLastAccess()) {
                output->AddRow(row, OUTPUT_HISTORY);
            }
            prevLastAccess = row.GetLastAccess();
        }
        output->AddRow(row, OUTPUT_LAST_STATE);
    }
};
REGISTER_REDUCER(TFeedsHistoryReducer)

struct TFeedsHistoryArchiveReducer : public NYT::IReducer<NYT::TTableReader<NProto::TFeedHistoryArchiveRecord>, NYT::TTableWriter<NProto::TFeedHistoryArchiveRecord>> {
public:
    TFeedsHistoryArchiveReducer() = default;

    void Do(TReader *input, TWriter *output) override {
        i32 minArchiveAge = Now().Seconds() - 86400 * 14;
        // deduplication and filtering
        const NProto::TFeedHistoryArchiveRecord &record = input->GetRow();
        if (record.GetTimestamp() >= minArchiveAge) {
            output->AddRow(input->GetRow());
        }
    }
};
REGISTER_REDUCER(TFeedsHistoryArchiveReducer)

TString FillInputTables(const NYT::ITransactionPtr tx, const TString &dir, const TString &progressAttr, TVector<NYT::TRichYPath> &inputTables) {
    const auto& config = TConfig::CInstance();
    TString newLastProcessedName;
    TRegularExpression regex("/(\\d+)$");
    TVector<TString> hits;
    TDeque<NYTUtils::TTableInfo> sourceTables;
    NYTUtils::GetTableList(tx, dir, sourceTables);
    std::sort(sourceTables.rbegin(), sourceTables.rend(), NYTUtils::TTableInfo::TNameGreater());
    TString lastProcessedName;
    // collect destination table names
    try {
        const NYT::TNode &lps = NYTUtils::GetAttr(tx, config.TABLE_DESTINATION_ROOT, progressAttr);
        lastProcessedName = lps.AsString();
    } catch (yexception &e) {
        lastProcessedName = "";
    }
    LOG_INFO("Last processed source for %s is %s", dir.c_str(), lastProcessedName.c_str());
    for (const auto& tableInfo : sourceTables) {
        if (regex.GetMatches(tableInfo.Name, hits) && tableInfo.RecordCount > 0) {
            const TString &tableName = NYTUtils::GetTableName(tableInfo.Name);
            if (tableName > lastProcessedName) {
                inputTables.push_back(tableInfo.Name);
                newLastProcessedName = std::max(tableName, newLastProcessedName);
            }
        }
    }
    return newLastProcessedName;
}

int ProcessOfferLogs(int, const char **) {
    const TConfig &config = TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(config.MR_TURBO_SERVER_HOST);
    NYT::ITransactionPtr tx = client->StartTransaction();
    LOG_INFO("Processing offer logs");

    TString newLastProcessedName, newLastProcessedTurboName;
    TVector<NYT::TRichYPath> inputTables;
    newLastProcessedName = FillInputTables(tx, config.TABLE_SOURCE_LOGS_ROOT, LAST_PROCESSED_SOURCE_ATTR, inputTables);
    newLastProcessedTurboName = FillInputTables(tx, config.TABLE_SOURCE_TURBO_LOGS_ROOT, LAST_PROCESSED_TURBO_SOURCE_ATTR, inputTables);

    if (inputTables.empty()) {
        LOG_INFO("No new data. Exiting");
        tx->Commit();
        return 0;
    }

    // tables definitions
    TString historyTableName = NYTUtils::JoinPath(config.TABLE_FEEDS_HISTORY, ToString(Now().MilliSeconds()));
    TTable<NProto::TFeedHistoryRecord> history(tx, NYT::TRichYPath(historyTableName)
        .Schema(NYT::CreateTableSchema<NProto::TFeedHistoryRecord>()));
    TTable<NProto::TFeedHistoryRecord> lastState(tx, NYT::TRichYPath(config.TABLE_FEEDS_LAST_STATE)
        .Schema(NYT::CreateTableSchema<NProto::TFeedHistoryRecord>()));
    TTable<NProto::TFeedHistoryArchiveRecord> archive(tx, NYT::TRichYPath(config.TABLE_FEEDS_ARCHIVE)
        .Schema(NYT::CreateTableSchema<NProto::TFeedHistoryArchiveRecord>()));
    TTable<NProto::TFeedHistoryArchiveRecord> intmArchive(tx, NYT::TRichYPath(NYTUtils::JoinPath(config.TABLE_DESTINATION_ROOT, "intm-feeds-archive"))
        .Schema(NYT::CreateTableSchema<NProto::TFeedHistoryArchiveRecord>()));

    LOG_INFO("Converting raw offer logs");
    // process raw offer-logs
    TMapReduceCmd <TExtractTimestampMapper, TOfferLogsReducer> mapReduceCmd(tx);
    for (const NYT::TRichYPath &input : inputTables) {
        mapReduceCmd.Input<NProto::TOfferLogRecord>(input);
    }
    mapReduceCmd
        .Output(history)
        .Output(intmArchive)
        .ReduceBy({FIELD_FEED, FIELD_TIMESTAMP})
        .SortBy({FIELD_FEED, FIELD_TIMESTAMP, FIELD_HASH})
        .ReducerMemoryLimit(1_GBs)
        .MaxRowWeight(128_MBs)
        .Do();

    DoParallel(
        TSortCmd<NProto::TFeedHistoryRecord>(tx, history).By({FIELD_HOST, FIELD_FEED, FIELD_TIMESTAMP, FIELD_LAST_ACCESS}),
        TSortCmd<NProto::TFeedHistoryArchiveRecord>(tx, intmArchive).By({FIELD_HOST, FIELD_FEED, FIELD_TIMESTAMP, FIELD_LAST_ACCESS})
    );

    // reduce history and archive
    DoParallel(
        TReduceCmd<TFeedsHistoryReducer>(tx)
            .Input(history)
            .Input(lastState.IfExists())
            .Output(history)
            .Output(lastState)
            .ReduceBy({FIELD_HOST, FIELD_FEED})
            .SortBy({FIELD_HOST, FIELD_FEED, FIELD_TIMESTAMP, FIELD_LAST_ACCESS})
            .MemoryLimit(1_GBs),
        TReduceCmd<TFeedsHistoryArchiveReducer>(tx)
            .Input(intmArchive)
            .Input(archive.IfExists())
            .Output(archive)
            .ReduceBy({FIELD_HOST, FIELD_FEED, FIELD_TIMESTAMP})
            .SortBy({FIELD_HOST, FIELD_FEED, FIELD_TIMESTAMP})
            .MemoryLimit(1_GBs)
            .MaxRowWeight(128_MBs)
    );

    DoParallel(
        TSortCmd<NProto::TFeedHistoryRecord>(tx, history).By({FIELD_HOST, FIELD_FEED, FIELD_TIMESTAMP, FIELD_LAST_ACCESS}),
        TSortCmd<NProto::TFeedHistoryRecord>(tx, lastState).By({FIELD_HOST, FIELD_FEED, FIELD_TIMESTAMP, FIELD_LAST_ACCESS}),
        TSortCmd<NProto::TFeedHistoryArchiveRecord>(tx, archive).By({FIELD_HOST, FIELD_FEED, FIELD_TIMESTAMP, FIELD_LAST_ACCESS})
    );

    NYTUtils::SetAttr(tx, config.TABLE_DESTINATION_ROOT, LAST_PROCESSED_SOURCE_ATTR, newLastProcessedName);
    NYTUtils::SetAttr(tx, config.TABLE_DESTINATION_ROOT, LAST_PROCESSED_TURBO_SOURCE_ATTR, newLastProcessedTurboName);
    intmArchive.Drop();

    LOG_INFO("Finished successfully");
    tx->Commit();
    return 0;
}

int ProcessSerpdataLogs(int, const char **) {
    const TConfig &config = TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(config.MR_TURBO_SERVER_HOST);
    NYT::ITransactionPtr tx = client->StartTransaction();

    LOG_INFO("Processing serpdata logs");

    TString newLastProcessedByFeedName, newLastProcessedTotalName, newLastFeedsIndexationName;
    TVector<NYT::TRichYPath> totalTables, feedTables;
    newLastProcessedTotalName = FillInputTables(tx, config.TABLE_SOURCE_SERPDATA_TOTAL_LOGS_ROOT, LAST_PROCESSED_TOTAL_SOURCE_ATTR, totalTables);
    // for each total table read one row for theme
    THashMap<TString, ui64> totalThemes, feedThemes;
    for (const NYT::TRichYPath &input : totalTables) {
        auto reader = TTable<NProto::TFeedSerpdataRecord>(tx, input).GetSingleRowByIndexReader(0);
        if (reader->IsValid()) {
            NProto::TFeedSerpdataRecord record = reader->GetRow();
            totalThemes[record.GetTheme()] = FromString<i64>(NYTUtils::GetTableName(input.Path_)) / 1000;
         }
    }
    newLastProcessedByFeedName = FillInputTables(tx, config.TABLE_SOURCE_SERPDATA_BY_FEED_LOGS_ROOT, LAST_PROCESSED_BY_FEED_SOURCE_ATTR, feedTables);
    for (const NYT::TRichYPath &input : feedTables) {
        auto reader = TTable<NProto::TFeedSerpdataRecord>(tx, input).GetSingleRowByIndexReader(0);
        if (reader->IsValid()) {
            NProto::TFeedSerpdataRecord record = reader->GetRow();
            feedThemes[record.GetTheme()] = FromString<i64>(NYTUtils::GetTableName(input.Path_)) / 1000;
        }
    }
    if (totalTables.empty() && feedTables.empty()) {
        LOG_INFO("No new data. Exiting");
        tx->Commit();
        return 0;
    }

    // tables definitions
    TString historyTableName = NYTUtils::JoinPath(config.TABLE_SERPDATA_HISTORY, ToString(Now().MilliSeconds()));
    TTable<NProto::TFeedHistoryRecord> history(tx, NYT::TRichYPath(historyTableName)
        .Schema(NYT::CreateTableSchema<NProto::TFeedHistoryRecord>()));
    TTable<NProto::TFeedHistoryRecord> lastState(tx, NYT::TRichYPath(config.TABLE_SERPDATA_LAST_STATE)
        .Schema(NYT::CreateTableSchema<NProto::TFeedHistoryRecord>()));
    TTable<NProto::TFeedHistoryArchiveRecord> archive(tx, NYT::TRichYPath(config.TABLE_SERPDATA_ARCHIVE)
        .Schema(NYT::CreateTableSchema<NProto::TFeedHistoryArchiveRecord>()));
    TTable<NProto::TFeedHistoryArchiveRecord> intmArchive(tx, NYT::TRichYPath(NYTUtils::JoinPath(config.TABLE_DESTINATION_ROOT, "intm-serpdata-archive"))
        .Schema(NYT::CreateTableSchema<NProto::TFeedHistoryArchiveRecord>()));
    TTable<NProto::TFeedSerpdataRecord> hosts(tx, NYT::TRichYPath(NYTUtils::JoinPath(config.TABLE_DESTINATION_ROOT, "intm-hosts"))
        .Schema(NYT::CreateTableSchema<NProto::TFeedSerpdataRecord>()));

    LOG_INFO("Converting raw offer logs");
    // process raw offer-logs
    // tables timestamps
    THashMap<size_t, ui64> totalTs;
    THashMap<size_t, ui64> byFeedTs;
    for (const NYT::TRichYPath &input : totalTables) {
        totalTs[totalTs.size()] = FromString<i64>(NYTUtils::GetTableName(input.Path_)) / 1000;
    }
    for (const NYT::TRichYPath &input : feedTables) {
        byFeedTs[byFeedTs.size()] = FromString<i64>(NYTUtils::GetTableName(input.Path_)) / 1000;
    }
    // варим из last-state фейковую таблицу безошибочных состояний
    TMapCmd<TGenerateFakeRecordsMapper>(tx, new TGenerateFakeRecordsMapper(totalThemes, feedThemes))
        .Input(lastState)
        .Output(hosts)
        .Do();

    TTable<NProto::TFeedSerpdataRecord> intmTotal(
        tx, NYT::TRichYPath(NYTUtils::JoinPath(config.TABLE_DESTINATION_ROOT, "serpdata/intm-total"))
        .Schema(NYT::CreateTableSchema<NProto::TFeedSerpdataRecord>()));
    TTable<NProto::TFeedSerpdataRecord> intmByFeed(
        tx, NYT::TRichYPath(NYTUtils::JoinPath(config.TABLE_DESTINATION_ROOT, "serpdata/intm-by-feed"))
            .Schema(NYT::CreateTableSchema<NProto::TFeedSerpdataRecord>()));
    TMapCmd<TExtractSerpdataTimestampMapper> extractTotalTsCmd(tx, new TExtractSerpdataTimestampMapper(totalTs));
    TMapCmd<TExtractSerpdataTimestampMapper> extractByFeedTsCmd(tx, new TExtractSerpdataTimestampMapper(byFeedTs));
    for (const NYT::TRichYPath &input : totalTables) {
        extractTotalTsCmd.Input<NProto::TFeedSerpdataRecord>(input);
    }
    for (const NYT::TRichYPath &input : feedTables) {
        extractByFeedTsCmd.Input<NProto::TFeedSerpdataRecord>(input);
    }

    TTable<NProto::TSetWithFeedRecord> nativeFeedsSorted(tx, NYT::TRichYPath(NYTUtils::JoinPath(config.TABLE_DESTINATION_ROOT, "tmp-native-feeds"))
        .Schema(NYT::CreateTableSchema<NProto::TSetWithFeedRecord>()));
    TTable<NProto::TSetWithFeedRecord> offerBaseSorted(tx, NYT::TRichYPath(NYTUtils::JoinPath(config.TABLE_DESTINATION_ROOT, "tmp-offer-base"))
        .Schema(NYT::CreateTableSchema<NProto::TSetWithFeedRecord>()));
    // prepare data for real webmaster domain (relations feed->domain, set-domain).
    DoParallel(
        TSortCmd<NProto::TSetWithFeedRecord>(tx)
            .Input<NYT::TNode>(NYT::TRichYPath(config.TABLE_EXPORT_NATIVE_FEEDS).RenameColumns({{"url", "feed_url"}}).Columns({"feed_url", "domain"}))
            .Output<NProto::TSetWithFeedRecord>(nativeFeedsSorted)
            .By({"feed_url", "domain"}),
        TMapReduceCmd<TExtractSetUrlsMapper, TRemoveDuplicatesReducer>(tx)
            .Input<NProto::TOfferBaseRecord>(NYT::TRichYPath(config.TABLE_SOURCE_OFFER_BASE).Columns({"feed_url", "sets"}))
            .Output<NProto::TSetWithFeedRecord>(offerBaseSorted)
            .ReduceBy({"feed_url", "url", "domain"}),
        extractTotalTsCmd.Output(intmTotal).MemoryLimit(1_GBs).MaxRowWeight(128_MBs),
        extractByFeedTsCmd.Output(intmByFeed).MemoryLimit(1_GBs).MaxRowWeight(128_MBs)
    );
    // sort offerBaseSorted for joining with native feeds export
    DoParallel(
        TSortCmd<NProto::TSetWithFeedRecord>(tx, offerBaseSorted).By({"feed_url"}),
        TReduceCmd<TRemoveDuplicatesReducer>(tx)
            .Input<NProto::TSetWithFeedRecord>(nativeFeedsSorted)
            .Output<NProto::TSetWithFeedRecord>(nativeFeedsSorted)
            .ReduceBy({"feed_url", "domain"}),
        TSortCmd<NProto::TFeedSerpdataRecord>(tx, intmTotal).By("url"),
        TSortCmd<NProto::TFeedSerpdataRecord>(tx, intmByFeed).By("feed_url")
    );
    TSortCmd<NProto::TSetWithFeedRecord>(tx, nativeFeedsSorted).By("feed_url").Do();
    // join offer base with wmc feeds
    TReduceCmd<TJoinOfferBaseSetsWithFeeds>(tx)
        .Input(nativeFeedsSorted, NativeFeedsInputTag)
        .Input(offerBaseSorted, OfferBaseSetsInputTag)
        .Output(offerBaseSorted, SetsWithFeedsOutputTag)
        .ReduceBy("feed_url")
        .MemoryLimit(1_GBs)
        .Do();

    TSortCmd<NProto::TSetWithFeedRecord>(tx, offerBaseSorted).By({"url", "domain"}).Do();
    TReduceCmd<TRemoveDuplicatesReducer>(tx)
        .Input<NProto::TSetWithFeedRecord>(offerBaseSorted)
        .Output<NProto::TSetWithFeedRecord>(offerBaseSorted)
        .ReduceBy({"url", "domain"})
        .Do();
    TSortCmd<NProto::TSetWithFeedRecord>(tx, offerBaseSorted).By({"url", "domain"}).Do();
    // join serpdata with real webmaster domains
    DoParallel(
        TReduceCmd<TSerpdataAndSetsJoinReducer>(tx)
            .Input(intmTotal, IntmSerpdataInputTag)
            .Input(offerBaseSorted, SetWithFeedInputTag)
            .Output(intmTotal, IntmSerpdataOutputTag)
            .ReduceBy("url")
            .MemoryLimit(1_GBs)
            .MaxRowWeight(128_MBs),
        TReduceCmd<TSerpdataAndSetsJoinReducer>(tx)
            .Input(intmByFeed, IntmSerpdataInputTag)
            .Input(nativeFeedsSorted, SetWithFeedInputTag)
            .Output(intmByFeed, IntmSerpdataOutputTag)
            .ReduceBy("feed_url")
            .MemoryLimit(1_GBs)
            .MaxRowWeight(128_MBs)
    );
    // join real webmaster domains
    DoParallel(
        TSortCmd<NProto::TFeedSerpdataRecord>(tx, hosts).By({"service", "feed_url", "theme", FIELD_TIMESTAMP}),
        TSortCmd<NProto::TFeedSerpdataRecord>(tx, intmTotal).By({"service", "feed_url", "theme", FIELD_TIMESTAMP}),
        TSortCmd<NProto::TFeedSerpdataRecord>(tx, intmByFeed).By({"service", "feed_url", "theme", FIELD_TIMESTAMP})
    );

    // history
    TReduceCmd<TSerpdataLogsReducer>(tx)
        .Input<NProto::TFeedSerpdataRecord>(hosts)
        .Input<NProto::TFeedSerpdataRecord>(intmTotal)
        .Input<NProto::TFeedSerpdataRecord>(intmByFeed)
        .Output(history)
        .Output(intmArchive)
        .ReduceBy({"service", "feed_url", "theme", FIELD_TIMESTAMP})
        .MemoryLimit(1_GBs)
        .MaxRowWeight(128_MBs)
        .Do();

    DoParallel(
        TSortCmd<NProto::TFeedHistoryRecord>(tx, history).By({FIELD_HOST, FIELD_FEED, FIELD_TYPE, FIELD_TIMESTAMP, FIELD_LAST_ACCESS}),
        TSortCmd<NProto::TFeedHistoryArchiveRecord>(tx, intmArchive).By({FIELD_HOST, FIELD_FEED, FIELD_TIMESTAMP, FIELD_LAST_ACCESS}),
        TSortCmd<NProto::TFeedSerpdataRecord>(tx, hosts).By({"service", "theme"})
    );

    // reduce history and archive
    DoParallel(
        TReduceCmd<TFeedsHistoryReducer>(tx)
            .Input(history)
            .Input(lastState.IfExists())
            .Output(history)
            .Output(lastState)
            .ReduceBy({FIELD_HOST, FIELD_FEED, FIELD_TYPE})
            .SortBy({FIELD_HOST, FIELD_FEED, FIELD_TYPE, FIELD_TIMESTAMP, FIELD_LAST_ACCESS})
            .MemoryLimit(1_GBs),
        TReduceCmd<TFeedsHistoryArchiveReducer>(tx)
            .Input(intmArchive)
            .Input(archive.IfExists())
            .Output(archive)
            .ReduceBy({FIELD_HOST, FIELD_FEED, FIELD_TIMESTAMP})
            .SortBy({FIELD_HOST, FIELD_FEED, FIELD_TIMESTAMP})
            .MemoryLimit(1_GBs)
            .MaxRowWeight(128_MBs)
    );

    DoParallel(
        TSortCmd<NProto::TFeedHistoryRecord>(tx, history).By({FIELD_HOST, FIELD_FEED, FIELD_TYPE, FIELD_TIMESTAMP, FIELD_LAST_ACCESS}),
        TSortCmd<NProto::TFeedHistoryRecord>(tx, lastState).By({FIELD_HOST, FIELD_FEED, FIELD_TYPE, FIELD_TIMESTAMP, FIELD_LAST_ACCESS}),
        TSortCmd<NProto::TFeedHistoryArchiveRecord>(tx, archive).By({FIELD_HOST, FIELD_FEED, FIELD_TIMESTAMP, FIELD_LAST_ACCESS})
    );

    NYTUtils::SetAttr(tx, config.TABLE_DESTINATION_ROOT, LAST_PROCESSED_BY_FEED_SOURCE_ATTR, newLastProcessedByFeedName);
    NYTUtils::SetAttr(tx, config.TABLE_DESTINATION_ROOT, LAST_PROCESSED_TOTAL_SOURCE_ATTR, newLastProcessedTotalName);
    // drop temp tables
    intmArchive.Drop();
    nativeFeedsSorted.Drop();
    offerBaseSorted.Drop();
    intmTotal.Drop();
    intmByFeed.Drop();
    hosts.Drop();

    LOG_INFO("Finished successfully");
    tx->Commit();
    return 0;
}

} //namespace NFeeds
} //namespace NWebmaster

