#include <util/generic/set.h>
#include <util/generic/size_literals.h>

#include <mapreduce/yt/interface/protos/yamr.pb.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/processors/tools/protos/user_urls.pb.h>
#include <wmconsole/version3/processors/user_sessions/conf/config.h>
#include <wmconsole/version3/processors/user_sessions/library/source_tables.h>
#include <wmconsole/version3/processors/user_sessions/library/utils.h>
#include <wmconsole/version3/processors/user_sessions/protos/reports.pb.h>
#include <wmconsole/version3/processors/user_sessions/protos/user_sessions.pb.h>
#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/protos/querygroups.pb.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>

#include "limits.h"
#include "task_archive.h"
#include "task_hosts.h"
#include "task_last.h"

namespace NWebmaster {
namespace NReports {

using namespace NJupiter;

static const TInputTag<NUserSessions::NProto::TQuery> UserSessionsInputTag  (1);

static const TOutputTag<NProto::TArchiveSource> ArchiveSourceOutputTag      (2);
static const TOutputTag<NProto::TTopUrlReport> TopUrlReportOutputTag        (3);

const NYT::TSortColumns & FIELDS_URLS_QUERIES() {
    const static NYT::TSortColumns fields = {
        "Host",
        "CorrectedQuery",
        "Path",
        "Clicks",
        "Cm2",
        "IsMetrikaRobot",
        "IsMobile",
        "IsPad",
        "Position",
        "RegionId",
        "RequestSource",
        "ResultSource",
        "Shows",
        "Timestamp",
        "UIDFraudRank",
    };

    return fields;
}

static proto::queries2::QueryPositionGroup PositionToGroup(int position) {
    if (position < 2) {
        return proto::queries2::GROUP_1;
    } else if (position < 4) {
        return proto::queries2::GROUP_2_3;
    } else if (position < 11) {
        return proto::queries2::GROUP_4_10;
    } else if (position < 51) {
        return proto::queries2::GROUP_11_50;
    } else {
        return proto::queries2::GROUP_51_MAX;
    }
}

//ReduceBy Host, CorrectedQuery, Path
struct TLastQueriesMergeReducer : public TTaggedReducer {
public:
    TLastQueriesMergeReducer() = default;
    TLastQueriesMergeReducer(const THashSet<TString> &webmasterHosts, const TVector<time_t> &tableConfig)
        : WebmasterHosts(webmasterHosts)
        , TableConfig(tableConfig)
    {
    }

    void Save(IOutputStream& stream) const override {
        ::Save(&stream, WebmasterHosts);
        ::Save(&stream, TableConfig);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, WebmasterHosts);
        ::Load(&stream, TableConfig);
        TTaggedReducer::Load(stream);
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        const ui32 MAX_QUERIES_COUNT = Max<ui32>();

        TMaybe<NUserSessions::NProto::TQuery> firstRow = reader.GetRowMaybe(UserSessionsInputTag);
        if (!WebmasterHosts.contains(firstRow->GetHost())) {
            return;
        }

        const TString host = firstRow->GetHost();
        const TString path = firstRow->GetPath();
        const TString query = firstRow->GetCorrectedQuery();
        const TString url = host + path;

        THashMap<time_t, TArchiveCounter> archiveCounters;
        TMap<proto::queries2::QueryPositionGroup, TLastQueriesRecord> groups;
        for (time_t ts : TableConfig) {
            archiveCounters[ts];
        }
        for (; reader.IsValid(); reader.Next()) {
            const auto &row = reader.GetRow(UserSessionsInputTag);
            if (!NUserSessions::IsVisibleQueryInWebmaster(row)) {
                continue;
            }

            if (row.GetPosition() >= 50) {
                continue;
            }

            TLastQueriesRecord &record = groups[PositionToGroup(row.GetPosition())];
            record.ShowsCount += row.GetShows();
            record.ClicksCount += row.GetClicks();
            record.ShowsPositionProduct += row.GetShows() * row.GetPosition();
            record.ClicksPositionProduct += row.GetClicks() * row.GetPosition();

            const time_t timestamp = TableConfig.at(reader.GetTableIndex());
            archiveCounters[timestamp].Add(row);
        }

        if (groups.size() == 0) {
            return;
        }

        TMap<time_t, TArchiveCounter> archiveCountersSorted(archiveCounters.begin(), archiveCounters.end());
        NProto::TArchiveSource dstMsg;
        dstMsg.SetHost(host);
        dstMsg.SetPath(path);
        dstMsg.SetQuery(query);
        for (const auto &obj : archiveCountersSorted) {
            NProto::TArchiveRecord *record = dstMsg.MutableRecords()->AddRecords();
            const time_t timestamp = obj.first;
            const auto &counter = obj.second;
            record->SetTimestamp(timestamp);
            counter.Serialize(*record);
        }
        writer.AddRow(dstMsg, ArchiveSourceOutputTag);

        proto::queries2::QueryWeightedAggrInfo msgDst;
        ui32 clicksCount = 0;
        ui32 showsCount = 0;
        for (const auto &obj : groups) {
            proto::queries2::QueryPositionGroupAggrInfo *qpgai = msgDst.add_per_group_info();
            qpgai->set_group(obj.first);
            qpgai->set_clicks_count(obj.second.ClicksCount);
            qpgai->set_shows_count(obj.second.ShowsCount);
            qpgai->set_clicks_position_product(obj.second.ClicksPositionProduct);
            qpgai->set_shows_position_product(obj.second.ShowsPositionProduct);
            clicksCount += obj.second.ClicksCount;
            showsCount += obj.second.ShowsCount;
        }
        msgDst.set_clicks_count(clicksCount);
        msgDst.set_shows_count(showsCount);
        msgDst.set_url(url);
        msgDst.set_query(query);
        TString stream;
        Y_PROTOBUF_SUPPRESS_NODISCARD msgDst.SerializeToString(&stream);

        NProto::TTopUrlReport dstRow;
        dstRow.SetKey(host);
        dstRow.SetSubkey(Sprintf("%08X-%08X", MAX_QUERIES_COUNT - showsCount, MAX_QUERIES_COUNT - clicksCount));
        dstRow.SetValue(stream);
        writer.AddRow(dstRow, TopUrlReportOutputTag);
    }

public:
    THashSet<TString> WebmasterHosts;
    TVector<time_t> TableConfig;

}; //TLastQueriesMergeReducer

REGISTER_REDUCER(TLastQueriesMergeReducer)

static NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://www.drive2.ru"))));
    return path;
}

int TaskLastQueries(int, const char **) {
    const auto &cfg = TConfig::CInstance();
    const int MAX_QUERIES_COUNT = 17500000;
    const int PERIOD_DAYS_LAST = 7;

    NYT::IClientPtr client = NYT::CreateClient(TCommonYTConfigSQ::CInstance().MR_SERVER_HOST_USER_SESSIONS);
    TSourceTablesForTarget targetLast(client, PERIOD_DAYS_LAST, cfg.TABLE_REPORTS_LAST_QUERIES);

    if (targetLast.IsUpdated) {
        LOG_INFO("reports, tables are already updated");
        return 0;
    }

    if (!targetLast.IsComplete) {
        LOG_ERROR("reports, source table set is incomplete");
        return 1;
    }

    NYTUtils::CreatePath(client, TConfig::CInstance().TABLE_REPORTS_ROOT);
    NYT::ITransactionPtr tx = client->StartTransaction();

    TVector<time_t> tableConfig;
    TVector<TTable<NUserSessions::NProto::TQuery>> inputs;
    for (auto it = targetLast.Tables.rbegin(); it != targetLast.Tables.rend(); ++it) {
        TTable<NUserSessions::NProto::TQuery> table(tx, DebugPath(it->Name));
        inputs.push_back(table.SelectFields(FIELDS_URLS_QUERIES()));
        LOG_INFO("reports, last_queries, input %s", it->Name.c_str());
        tableConfig.push_back(it->PeriodBegin);
    }

    if (inputs.size() != PERIOD_DAYS_LAST) {
        ythrow yexception() << "reports, last_queries, there is no enough tables";
    }

    THashSet<TString> webmasterHosts = { };
    LoadWebmasterHosts(client, webmasterHosts);

    LOG_INFO("reports, last_queries, output %s", cfg.TABLE_REPORTS_LAST_QUERIES.c_str());
    LOG_INFO("reports, last_queries, output %s", cfg.TABLE_ARCHIVE_INTM_FULL.c_str());
    TReduceCmd<TLastQueriesMergeReducer>(tx, new TLastQueriesMergeReducer(webmasterHosts, tableConfig))
        .Inputs(inputs, UserSessionsInputTag)
        .Output(TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_LAST_QUERIES).AsSortedOutput({"key"}), TopUrlReportOutputTag)
        .Output(TTable<NProto::TArchiveSource>(tx, cfg.TABLE_ARCHIVE_INTM_FULL).AsSortedOutput({"Host"}), ArchiveSourceOutputTag)
        .MemoryLimit(4_GBs)
        .ReduceBy({"Host", "CorrectedQuery", "Path"})
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .Do()
    ;

    auto archiveSortOpPtr = TSortCmd<NProto::TArchiveSource>(tx, TTable<NProto::TArchiveSource>(tx, cfg.TABLE_ARCHIVE_INTM_FULL))
        .By({"Host", "Path", "Query"})
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .DoAsync()
    ;

    TCombineReduceCmd<TRecordsLimitCombineReducer, TRecordsLimitCombineReducer>(
        tx,
        new TRecordsLimitCombineReducer(MAX_QUERIES_COUNT),
        new TRecordsLimitCombineReducer(MAX_QUERIES_COUNT)
    )
        .Input(TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_LAST_QUERIES))
        .Output(TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_LAST_QUERIES))
        .ReduceBy({"key"})
        .SortBy({"key", "subkey"})
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .Do()
    ;

    TSortCmd<NProto::TTopUrlReport>(tx, TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_LAST_QUERIES))
        .By({"key", "subkey"})
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .Do()
    ;

    archiveSortOpPtr->Watch().Wait();

    Sort(tableConfig.begin(), tableConfig.end());
    NYT::TNode timestamps = NYT::TNode::CreateList();
    for (time_t ts : tableConfig) {
        timestamps.Add(ts);
    }
    SetYtAttr(tx, cfg.TABLE_ARCHIVE_INTM_FULL, TAttrName::Timestamps, timestamps);

    targetLast.UpdateTarget(tx);
    TYtSourceTrigger trigger(tx, cfg.TABLE_ARCHIVE_INTM_FULL);
    trigger.Update(tx, targetLast.GetTarget());
    LOG_INFO("reports, last_queries, done");

    tx->Commit();
    return 0;
}

} //namespace NReports
} //namespace NWebmaster
