#include <util/datetime/base.h>
#include <util/digest/fnv.h>
#include <util/generic/size_literals.h>
#include <util/string/cast.h>
#include <util/string/escape.h>
#include <util/string/split.h>
#include <util/string/vector.h>

#include <library/cpp/charset/doccodes.h>
#include <library/cpp/mime/types/mime.h>

#include <mapreduce/lib/all.h>

#include <robot/jupiter/protos/acceptance.pb.h>
#include <robot/jupiter/protos/export.pb.h>
#include <robot/library/yt/static/command.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/processors/indexing/hostinfo/conf/config.h>
#include <wmconsole/version3/processors/indexing/hostinfo/protos/hostinfo.pb.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/misc.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>

#include "task_host_statistics.h"

namespace NWebmaster {

using namespace NJupiter;

namespace {
const char *F_HOST = "Host";
const char *F_PATH = "Path";
const char *F_HASH = "Hash";
const char *F_COUNT_NEGATIVE = "CountNegative";
const int MAX_PREVAILING_CONTENT_SAMPLES = 1000;
const int MAX_PREVAILING_CONTENT_SAMPLES_IN_STATS = 10;
const int MAX_SAMPLES_PER_CONTENT = 100000;
const int MAX_SAMPLES_PER_CONTENT_IN_STATS = 10;
const int MAX_SAMPLES = 1000;
const int SLOW_FETCH_TIME_LIMIT = 3000; //ms

TInputTag <NJupiter::TAcceptanceUrlForWebMasterRecord> JupiterUrldatInputTag(1);
TInputTag <NJupiter::TContentAttrsForWebmaster> JupiterContentAttrsInputTag(2);
TInputTag <NJupiter::TUrldatForWebmaster> JupiterSpreadInputTag(3);
TInputTag <NProto::THostStatistics> HostStatisticsInputTag(4);
TInputTag <NProto::THostContentAttrStatistics> ContentAttrStatisticsInputTag(5);
TInputTag <NProto::THostContentAttrStatistics> TitleStatisticsInputTag(6);
TInputTag <NProto::THostContentAttrStatistics> DescrStatisticsInputTag(7);

TOutputTag <NProto::THostStatistics> HostStatisticsOutputTag(10);
TOutputTag <NProto::THostContentAttrStatistics> TitleStatisticsOutputTag(11);
TOutputTag <NProto::THostContentAttrStatistics> DescrStatisticsOutputTag(12);
TOutputTag <NProto::THostContentAttrStatistics> ContentAttrStatisticsOutputTag(13);
}

using namespace NJupiter;

struct THostInfoPreprocessor : public TTaggedReducer {
public:
    THostInfoPreprocessor() = default;

    THostInfoPreprocessor(const THashSet <TString> &webmasterHosts)
        : WebmasterHosts(webmasterHosts) {
    }

    void Save(IOutputStream &stream) const override {
        ::Save(&stream, WebmasterHosts);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream &stream) override {
        ::Load(&stream, WebmasterHosts);
        TTaggedReducer::Load(stream);
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        const TMaybe <TAcceptanceUrlForWebMasterRecord> mbJupiter = reader.GetSingleRowMaybe(JupiterUrldatInputTag);
        const TMaybe <TContentAttrsForWebmaster> mbContent = reader.GetSingleRowMaybe(JupiterContentAttrsInputTag);
        // last spread record
        TMaybe <TUrldatForWebmaster> mbSpread;
        for (TUrldatForWebmaster data : reader.GetRows(JupiterSpreadInputTag)) {
            if (mbSpread.Empty() || data.GetLastAccess() > mbSpread->GetLastAccess()) {
                mbSpread = data;
            }
        }
        if (mbJupiter.Empty() && mbSpread.Empty()) {
            return;
        }
        const TString &host = mbJupiter ? mbJupiter->GetHost() : mbSpread->GetHost();
        const TString &path = mbJupiter ? mbJupiter->GetPath() : mbSpread->GetPath();
        if (!WebmasterHosts.contains(host)) {
            return;
        }

        NProto::THostStatistics stats;
        stats.SetHost(host);
        stats.SetDocs(1);
        if (mbSpread.Defined()) {
            i64 fetchTime = mbSpread->GetFetchTime() / 1000;
            if (fetchTime > 0) {
                stats.SetPagesWithFetchTime(1);
                stats.SetTotalFetchTime(fetchTime);
            }
            if (fetchTime >= SLOW_FETCH_TIME_LIMIT) {
                NProto::TPageSample sample;
                sample.SetPath(path);
                sample.SetHttpCode(mbSpread->GetHttpCode());
                sample.SetLastAccess(mbSpread->GetLastAccess());
                sample.SetFetchTime(fetchTime);
                stats.SetSlowPages(1);
                *stats.MutableSamples()->AddSlowPageSamples() = sample;
            }
        }

        if (mbJupiter.Defined()) {
            bool isSearchable = mbJupiter->GetIsSearchable();
            stats.SetDocsOnSearch(isSearchable ? 1 : 0);
            bool isMimeHtml = mbJupiter->GetMimeType() == MIME_HTML;
            stats.SetHtmlDocs(isMimeHtml ? 1 : 0);
            EAcceptanceUrlForWebMasterSimpleStatus urlStatus = static_cast<EAcceptanceUrlForWebMasterSimpleStatus>(mbJupiter->GetUrlStatus());
            ui32 httpCode = mbJupiter->GetHttpCode();
            bool isCanonical = mbJupiter->GetRelCanonicalTarget().empty() || mbJupiter->GetRelCanonicalTarget() == (host + path);

            if (isMimeHtml && httpCode == 200 && isCanonical && urlStatus == NJupiter::AUFWSS_SEMIDUP) {
                stats.SetDuplicatePages(1);
                NProto::TPageSample sample;
                sample.SetPath(path);
                sample.SetLastAccess(mbContent.Defined() ? mbContent->GetLastAccess() : mbJupiter->GetLastAccess());
                sample.SetMainUrl(mbJupiter->GetMainHost() + mbJupiter->GetMainPath());
                *stats.MutableSamples()->AddDuplicatePageSamples() = sample;
            }
            if (isMimeHtml && isSearchable && httpCode == 200 && urlStatus == NJupiter::AUFWSS_INDEXED) {
                stats.SetGoodDocsOnSearch(isCanonical ? 1 : 0);
                NProto::TPageSample sample;
                sample.SetPath(path);
                sample.SetLastAccess(mbContent.Defined() ? mbContent->GetLastAccess() : mbJupiter->GetLastAccess());
                const TString &title = Strip(mbContent->GetTitleRawUTF8());
                if (mbContent.Empty() || title.empty()) {
                    stats.SetEmptyTitles(1);
                    *stats.MutableSamples()->AddEmptyTitleSamples() = sample;
                } else if (isCanonical) {
                    // store title stats
                    NProto::THostContentAttrStatistics cas;
                    cas.SetHost(host);
                    cas.SetHash(FnvHash<i64>(ToUpperUTF8(title)));
                    cas.SetValue(WideToUTF8(UTF8ToWide(title).substr(0, 1024)));
                    cas.SetCount(1);
                    *cas.MutableSamples()->AddPageSamples() = sample;
                    writer.AddRow(cas, TitleStatisticsOutputTag);
                }
                const TString &description = Strip(mbContent->GetMetaDescription());
                if (mbContent.Empty() || description.empty()) {
                    stats.SetEmptyDescriptions(1);
                    *stats.MutableSamples()->AddEmptyDescriptionSamples() = sample;
                } else if (isCanonical) {
                    NProto::THostContentAttrStatistics cas;
                    cas.SetHost(host);
                    cas.SetHash(FnvHash<i64>(ToUpperUTF8(description)));
                    cas.SetValue(WideToUTF8(UTF8ToWide(description).substr(0, 1024)));
                    cas.SetCount(1);
                    *cas.MutableSamples()->AddPageSamples() = sample;
                    writer.AddRow(cas, DescrStatisticsOutputTag);
                }
            }
        }
        // morda
        if (path == "/") {
            NProto::TPageSample sample;
            sample.SetPath(path);
            sample.SetHttpCode(mbSpread ? mbSpread->GetHttpCode() : mbJupiter->GetHttpCode());
            sample.SetLastAccess(mbSpread ? mbSpread->GetLastAccess() : mbJupiter->GetLastAccess());
            *stats.MutableSamples()->MutableMordaSample() = sample;
        }

        writer.AddRow(stats, HostStatisticsOutputTag);
    }

private:
    THashSet <TString> WebmasterHosts;
};
REGISTER_REDUCER(THostInfoPreprocessor)

void CopyPageSamples(::google::protobuf::RepeatedPtrField <NProto::TPageSample>* dst,
                     const ::google::protobuf::RepeatedPtrField <NProto::TPageSample>* const src,
                     int limit) {
    if (src != nullptr) {
        dst->MergeFrom(*src);
    }
    if (dst->size() > (limit * 2) || src == nullptr) {
        std::sort(dst->begin(), dst->end(), [](const NProto::TPageSample &a, const NProto::TPageSample &b) {
            return a.GetLastAccess() > b.GetLastAccess();
        });
        if (dst->size() > limit) {
            dst->Truncate(limit);
        }
    }
}

void CopyContentAttrsSamples(::google::protobuf::RepeatedPtrField <NProto::TContentAttrSample>* dst,
                             const ::google::protobuf::RepeatedPtrField <NProto::TContentAttrSample>* const src,
                             int limit) {
    if (src != nullptr) {
        dst->MergeFrom(*src);
    }
    if (dst->size() > (limit * 2) || src == nullptr) {
        std::sort(dst->begin(), dst->end(), [](const NProto::TContentAttrSample &a, const NProto::TContentAttrSample &b) {
            return a.GetCount() > b.GetCount();
        });
        if (dst->size() > limit) {
            dst->Truncate(limit);
        }
    }
}

void MergeHostStatistics(NProto::THostStatistics &dst, const NProto::THostStatistics &src) {
    dst.SetHost(src.GetHost());
    dst.SetDocs(dst.GetDocs() + src.GetDocs());
    dst.SetDocsOnSearch(dst.GetDocsOnSearch() + src.GetDocsOnSearch());
    dst.SetHtmlDocs(dst.GetHtmlDocs() + src.GetHtmlDocs());
    dst.SetGoodDocsOnSearch(dst.GetGoodDocsOnSearch() + src.GetGoodDocsOnSearch());
    dst.SetEmptyTitles(dst.GetEmptyTitles() + src.GetEmptyTitles());
    dst.SetEmptyDescriptions(dst.GetEmptyDescriptions() + src.GetEmptyDescriptions());
    dst.SetSlowPages(dst.GetSlowPages() + src.GetSlowPages());
    dst.SetPagesWithFetchTime(dst.GetPagesWithFetchTime() + src.GetPagesWithFetchTime());
    dst.SetTotalFetchTime(dst.GetTotalFetchTime() + src.GetTotalFetchTime());
    dst.SetUniqueTitles(dst.GetUniqueTitles() + src.GetUniqueTitles());
    dst.SetUniqueDescriptions(dst.GetUniqueDescriptions() + src.GetUniqueDescriptions());
    dst.SetDuplicateTitles(dst.GetDuplicateTitles() + src.GetDuplicateTitles());
    dst.SetDuplicateDescriptions(dst.GetDuplicateDescriptions() + src.GetDuplicateDescriptions());
    dst.SetDuplicatePages(dst.GetDuplicatePages() + src.GetDuplicatePages());
    auto resultSamples = dst.MutableSamples();
    const auto &statsSamples = src.GetSamples();
    if (statsSamples.HasMordaSample()) {
        *resultSamples->MutableMordaSample() = statsSamples.GetMordaSample();
    }
    CopyPageSamples(resultSamples->MutableEmptyTitleSamples(), &statsSamples.GetEmptyTitleSamples(), MAX_SAMPLES);
    CopyPageSamples(resultSamples->MutableEmptyDescriptionSamples(), &statsSamples.GetEmptyDescriptionSamples(), MAX_SAMPLES);
    CopyPageSamples(resultSamples->MutableSlowPageSamples(), &statsSamples.GetSlowPageSamples(), MAX_SAMPLES);
    CopyContentAttrsSamples(resultSamples->MutablePrevailingTitles(), &statsSamples.GetPrevailingTitles(), MAX_PREVAILING_CONTENT_SAMPLES_IN_STATS);
    CopyContentAttrsSamples(resultSamples->MutablePrevailingDescriptions(), &statsSamples.GetPrevailingDescriptions(), MAX_PREVAILING_CONTENT_SAMPLES_IN_STATS);
    CopyPageSamples(resultSamples->MutableDuplicatePageSamples(), &statsSamples.GetDuplicatePageSamples(), MAX_SAMPLES);
}

struct THostStatisticsMerger : public TTaggedReducer {
public:
    THostStatisticsMerger() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        NProto::THostStatistics result = reader.GetRow(HostStatisticsInputTag);
        reader.Next();
        for (; reader.IsValid(); reader.Next()) {
            const NProto::THostStatistics &stats = reader.GetRow(HostStatisticsInputTag);
            MergeHostStatistics(result, stats);
        }
        auto resultSamples = result.MutableSamples();
        CopyPageSamples(resultSamples->MutableEmptyTitleSamples(), nullptr, MAX_SAMPLES);
        CopyPageSamples(resultSamples->MutableEmptyDescriptionSamples(), nullptr, MAX_SAMPLES);
        CopyPageSamples(resultSamples->MutableSlowPageSamples(), nullptr, MAX_SAMPLES);
        CopyContentAttrsSamples(resultSamples->MutablePrevailingTitles(), nullptr, MAX_PREVAILING_CONTENT_SAMPLES_IN_STATS);
        CopyContentAttrsSamples(resultSamples->MutablePrevailingDescriptions(), nullptr, MAX_PREVAILING_CONTENT_SAMPLES_IN_STATS);
        CopyPageSamples(resultSamples->MutableDuplicatePageSamples(), nullptr, MAX_SAMPLES);
        writer.AddRow(result, HostStatisticsOutputTag);
    }
};

REGISTER_REDUCER(THostStatisticsMerger)

struct TContentAttrStatisticsMerger : public TTaggedReducer {
public:
    TContentAttrStatisticsMerger() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        NProto::THostContentAttrStatistics result = reader.GetRow(ContentAttrStatisticsInputTag);
        reader.Next();
        for (; reader.IsValid(); reader.Next()) {
            NProto::THostContentAttrStatistics stat = reader.GetRow(ContentAttrStatisticsInputTag);
            result.SetCount(result.GetCount() + stat.GetCount());
            CopyPageSamples(result.MutableSamples()->MutablePageSamples(), &stat.GetSamples().GetPageSamples(), MAX_SAMPLES_PER_CONTENT);
        }
        CopyPageSamples(result.MutableSamples()->MutablePageSamples(), nullptr, MAX_SAMPLES_PER_CONTENT);
        result.SetCountNegative(-result.GetCount());
        result.SetUniques(1);
        result.SetDuplicates(result.GetCount() > 1 ? result.GetCount() : 0);
        writer.AddRow(result, ContentAttrStatisticsOutputTag);
    }
};

REGISTER_REDUCER(TContentAttrStatisticsMerger)

struct TContentAttrStatisticsTopReducer : public TTaggedReducer {
public:
    TContentAttrStatisticsTopReducer() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        // just save top MAX_PREVAILING_CONTENT_SAMPLES records
        size_t count = 0;
        i64 uniques = 0;
        i64 duplicates = 0;
        TString host;
        for (; reader.IsValid(); reader.Next()) {
            const NProto::THostContentAttrStatistics &stats = reader.GetRow(ContentAttrStatisticsInputTag);
            if (host.empty()) {
                host = stats.GetHost();
            }
            if (stats.GetCount() == 0) { // fake record for storing uniques and duplicates
                uniques += stats.GetUniques();
                duplicates += stats.GetDuplicates();
            } else {
                uniques += stats.GetUniques();
                duplicates += stats.GetDuplicates();
                if (count < MAX_PREVAILING_CONTENT_SAMPLES && stats.GetCount() > 1) {
                    // в примере обнулим uniques и duplicates, чтобы они не учитывались дважды
                    NProto::THostContentAttrStatistics statsCopy(stats);
                    statsCopy.SetUniques(0);
                    statsCopy.SetDuplicates(0);
                    writer.AddRow(statsCopy, ContentAttrStatisticsOutputTag);
                    count++;
                }
            }
        }
        NProto::THostContentAttrStatistics fakeStats;
        fakeStats.SetHost(host);
        fakeStats.SetUniques(uniques);
        fakeStats.SetDuplicates(duplicates);
        fakeStats.SetCountNegative(0);
        writer.AddRow(fakeStats, ContentAttrStatisticsOutputTag);
    }
};

REGISTER_REDUCER(TContentAttrStatisticsTopReducer)

struct TContentAttrStatisticsToHostStatisticsMapper : public TTaggedMapper {
public:
    TContentAttrStatisticsToHostStatisticsMapper() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        for (; reader.IsValid(); reader.Next()) {
            if (reader.IsCurrentTable(HostStatisticsInputTag)) {
                writer.AddRow(reader.GetRow(HostStatisticsInputTag), HostStatisticsOutputTag);
            } else if (reader.IsCurrentTable(TitleStatisticsInputTag)) {
                NProto::THostStatistics result;
                NProto::THostContentAttrStatistics stat = reader.GetRow(TitleStatisticsInputTag);
                result.SetHost(stat.GetHost());
                result.SetUniqueTitles(stat.GetUniques());
                result.SetDuplicateTitles(stat.GetDuplicates());
                if (stat.GetCount() > 0) {
                    NProto::TContentAttrSample *sample = result.MutableSamples()->AddPrevailingTitles();
                    sample->SetValue(stat.GetValue());
                    sample->SetCount(stat.GetCount());
                    int sampleCount = std::min(MAX_SAMPLES_PER_CONTENT_IN_STATS, stat.GetSamples().GetPageSamples().size());
                    for (int i = 0; i < sampleCount; i++) {
                        *sample->AddPageSamples() = stat.GetSamples().GetPageSamples(i);
                    }
                }
                writer.AddRow(result, HostStatisticsOutputTag);
            } else if (reader.IsCurrentTable(DescrStatisticsInputTag)) {
                NProto::THostStatistics result;
                NProto::THostContentAttrStatistics stat = reader.GetRow(DescrStatisticsInputTag);
                result.SetHost(stat.GetHost());
                result.SetUniqueDescriptions(stat.GetUniques());
                result.SetDuplicateDescriptions(stat.GetDuplicates());
                if (stat.GetCount() > 0) {
                    NProto::TContentAttrSample *sample = result.MutableSamples()->AddPrevailingDescriptions();
                    sample->SetValue(stat.GetValue());
                    sample->SetCount(stat.GetCount());
                    int sampleCount = std::min(MAX_SAMPLES_PER_CONTENT_IN_STATS, stat.GetSamples().GetPageSamples().size());
                    for (int i = 0; i < sampleCount; i++) {
                        *sample->AddPageSamples() = stat.GetSamples().GetPageSamples(i);
                    }
                }
                writer.AddRow(result, HostStatisticsOutputTag);
            }
        }
    }
};

REGISTER_MAPPER(TContentAttrStatisticsToHostStatisticsMapper)

static NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://lenta.ru"))));
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://znanija.com"))));
//    path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://xn--80apizf6d.009.xn--p1ai"))));
    return path;
}

int TaskUpdateHostStatistics(int argc, const char **argv) {
    NYT::Initialize(argc, argv);
    const auto &cfg = NHostInfo::TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(cfg.MR_SERVER_HOST_JUPITER);

    const TString jupiterTable = GetJupiterAcceptanceInProdTable(client);

    TYtSourceTrigger tableTrigger(client, cfg.TABLE_EXPORT_HOST_STATISTICS);
    if (!tableTrigger.NeedUpdate(jupiterTable)) {
        LOG_INFO("host_statistics, acceptance table %s is already processed", jupiterTable.data());
        return 0;
    }

    THashSet <TString> webmasterHosts;
    if (!NYTUtils::LoadWebmastersHosts(client, cfg.TABLE_SOURCE_WEBMASTER_HOSTS, webmasterHosts, 500000)) {
        ythrow yexception() << "there is problem with webmaster hosts table";
    }

    NYT::ITransactionPtr tx = client->StartTransaction();

    const TString contentAttrsTable = GetJupiterContentAttrsInProdTable(tx);
    auto hostStatisticsTable = TTable<NProto::THostStatistics>(tx, cfg.TABLE_EXPORT_HOST_STATISTICS);
    auto titleStatisticsTable = TTable<NProto::THostContentAttrStatistics>(tx,
        NYTUtils::JoinPath(cfg.TABLE_EXPORT_HOSTINFO_ROOT, "titles"));
    auto descrStatisticsTable = TTable<NProto::THostContentAttrStatistics>(tx,
        NYTUtils::JoinPath(cfg.TABLE_EXPORT_HOSTINFO_ROOT, "descriptions"));

    // save old host_statistics table for history
    const TString historyRoot = NYTUtils::JoinPath(cfg.TABLE_EXPORT_HOSTINFO_ROOT, "history");
    tx->Move(cfg.TABLE_EXPORT_HOST_STATISTICS, NYTUtils::JoinPath(historyRoot, "host_statistics." + ToString(Now().MilliSeconds())));
    tx->Move(titleStatisticsTable.GetPath().Path_, NYTUtils::JoinPath(historyRoot, "titles." + ToString(Now().MilliSeconds())));
    tx->Move(descrStatisticsTable.GetPath().Path_, NYTUtils::JoinPath(historyRoot, "descriptions." + ToString(Now().MilliSeconds())));

    TReduceCmd <THostInfoPreprocessor> preprocessor(tx, new THostInfoPreprocessor(webmasterHosts));
    preprocessor.Input(TTable<TAcceptanceUrlForWebMasterRecord>(tx, DebugPath(jupiterTable)), JupiterUrldatInputTag);
    preprocessor.Input(TTable<TContentAttrsForWebmaster>(tx, DebugPath(contentAttrsTable)),
                       JupiterContentAttrsInputTag);

    TDeque <NYTUtils::TTableInfo> spreadTables;
    LoadSpreadTables(tx, 0, spreadTables);
    LOG_INFO("Loaded %lu spread tables", spreadTables.size());
    for (const NYTUtils::TTableInfo &table : spreadTables) {
        preprocessor.Input(TTable<TUrldatForWebmaster>(tx, DebugPath(table.Name)), JupiterSpreadInputTag);
    }
    LOG_INFO("Preprocessing jupiter, content and spread tables");
    preprocessor
        .Output(hostStatisticsTable, HostStatisticsOutputTag)
        .Output(titleStatisticsTable, TitleStatisticsOutputTag)
        .Output(descrStatisticsTable, DescrStatisticsOutputTag)
        .ReduceBy({F_HOST, F_PATH})
        .MemoryLimit(2_GBs)
        .Do();

    LOG_INFO("Merging host, titles and descriptions statistics");
    DoParallel(
        TCombineReduceCmd<THostStatisticsMerger, THostStatisticsMerger>(tx)
            .Input(hostStatisticsTable, HostStatisticsInputTag)
            .IntermediateCombineInputTag(HostStatisticsInputTag)
            .IntermediateCombineOutputTag(HostStatisticsOutputTag)
            .IntermediateReduceTag(HostStatisticsInputTag)
            .Output(hostStatisticsTable, HostStatisticsOutputTag)
            .ReduceBy({F_HOST})
            .CombinerMemoryLimit(2_GBs)
            .ReducerMemoryLimit(2_GBs)
            .MaxRowWeight(128_MBs),
        TCombineReduceCmd<TContentAttrStatisticsMerger, TContentAttrStatisticsMerger>(tx)
            .Input(titleStatisticsTable, ContentAttrStatisticsInputTag)
            .IntermediateCombineInputTag(ContentAttrStatisticsInputTag)
            .IntermediateCombineOutputTag(ContentAttrStatisticsOutputTag)
            .IntermediateReduceTag(ContentAttrStatisticsInputTag)
            .Output(titleStatisticsTable, ContentAttrStatisticsOutputTag)
            .ReduceBy({F_HOST, F_HASH})
            .CombinerMemoryLimit(2_GBs)
            .ReducerMemoryLimit(2_GBs)
            .MaxRowWeight(128_MBs),
        TCombineReduceCmd<TContentAttrStatisticsMerger, TContentAttrStatisticsMerger>(tx)
            .Input(descrStatisticsTable, ContentAttrStatisticsInputTag)
            .IntermediateCombineInputTag(ContentAttrStatisticsInputTag)
            .IntermediateCombineOutputTag(ContentAttrStatisticsOutputTag)
            .IntermediateReduceTag(ContentAttrStatisticsInputTag)
            .Output(descrStatisticsTable, ContentAttrStatisticsOutputTag)
            .ReduceBy({F_HOST, F_HASH})
            .CombinerMemoryLimit(2_GBs)
            .ReducerMemoryLimit(2_GBs)
            .MaxRowWeight(128_MBs)
    );

    LOG_INFO("Collecting prevailing titles and descripttions");
    DoParallel(
        TCombineReduceCmd<TContentAttrStatisticsTopReducer, TContentAttrStatisticsTopReducer>(tx)
            .Input(titleStatisticsTable, ContentAttrStatisticsInputTag)
            .IntermediateCombineInputTag(ContentAttrStatisticsInputTag)
            .IntermediateCombineOutputTag(ContentAttrStatisticsOutputTag)
            .IntermediateReduceTag(ContentAttrStatisticsInputTag)
            .Output(titleStatisticsTable, ContentAttrStatisticsOutputTag)
            .ReduceBy({F_HOST})
            .SortBy({F_HOST, F_COUNT_NEGATIVE})
            .CombinerMemoryLimit(2_GBs)
            .ReducerMemoryLimit(2_GBs)
            .MaxRowWeight(128_MBs),
        TCombineReduceCmd<TContentAttrStatisticsTopReducer, TContentAttrStatisticsTopReducer>(tx)
            .Input(descrStatisticsTable, ContentAttrStatisticsInputTag)
            .IntermediateCombineInputTag(ContentAttrStatisticsInputTag)
            .IntermediateCombineOutputTag(ContentAttrStatisticsOutputTag)
            .IntermediateReduceTag(ContentAttrStatisticsInputTag)
            .Output(descrStatisticsTable, ContentAttrStatisticsOutputTag)
            .ReduceBy({F_HOST})
            .SortBy({F_HOST, F_COUNT_NEGATIVE})
            .CombinerMemoryLimit(2_GBs)
            .ReducerMemoryLimit(2_GBs)
            .MaxRowWeight(128_MBs)
    );

    LOG_INFO("Merging total host statistics");
    TMapReduceCmd<TContentAttrStatisticsToHostStatisticsMapper, THostStatisticsMerger>(tx)
        .Input(hostStatisticsTable, HostStatisticsInputTag)
        .Input(titleStatisticsTable, TitleStatisticsInputTag)
        .Input(descrStatisticsTable, DescrStatisticsInputTag)
        .IntermediateMapTag(HostStatisticsOutputTag)
        .IntermediateReduceTag(HostStatisticsInputTag)
        .Output(hostStatisticsTable, HostStatisticsOutputTag)
        .ReduceBy({F_HOST})
        .CombinerMemoryLimit(2_GBs)
        .ReducerMemoryLimit(2_GBs)
        .MaxRowWeight(128_MBs)
        .Do();

    LOG_INFO("Sorting result host statistics");
    DoParallel(
        TSortCmd<NProto::THostStatistics>(tx, hostStatisticsTable).By(F_HOST),
        TSortCmd<NProto::THostContentAttrStatistics>(tx, titleStatisticsTable).By({F_HOST, F_COUNT_NEGATIVE}),
        TSortCmd<NProto::THostContentAttrStatistics>(tx, descrStatisticsTable).By({F_HOST, F_COUNT_NEGATIVE})
    );

    tableTrigger.Update(tx, jupiterTable);
    tx->Commit();
    LOG_INFO("Finished successfully");
    return 0;
}
} // namespace NWebmaster
