#include <util/generic/set.h>
#include <util/generic/size_literals.h>

#include <mapreduce/yt/interface/protos/yamr.pb.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/processors/user_sessions/conf/config.h>
#include <wmconsole/version3/processors/user_sessions/library/compact_counters.h>
#include <wmconsole/version3/processors/user_sessions/library/monitor.h>
#include <wmconsole/version3/processors/user_sessions/library/regions_limiter.h>
#include <wmconsole/version3/processors/user_sessions/library/source_tables.h>
#include <wmconsole/version3/processors/user_sessions/protos/reports.pb.h>
#include <wmconsole/version3/processors/user_sessions/protos/user_sessions.pb.h>
#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/protos/querygroups.pb.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>
#include <wmconsole/version3/wmcutil/compress.h>

#include "limits.h"
#include "task_hosts.h"
#include "task_queries.h"

namespace NWebmaster {
namespace NReports {

using namespace NJupiter;

static const TInputTag<NProto::TFavoriteQueriesSettings> FavoriteQueriesSettingsInputTag (1);
static const TInputTag<NUserSessions::NProto::TQuery> UserSessionsInputTag               (2);

static const TOutputTag<NProto::TTopUrlReport> TopUrlReportOutputTag                     (3);

struct TAttrName {
    static constexpr char const Timestamps[] = "timestamps";
};

const NYT::TSortColumns & FIELDS_QUERIES() {
    const static NYT::TSortColumns fields = {
        "Host",
        "CorrectedQuery",
        "IsMobile",
        "IsPad",
        "RegionId",
        "Position",
        "Clicks",
        "Shows",
        "RequestSource",
        "ResultSource",
        "IsMetrikaRobot",
        "Cm2",
        "Timestamp",
        "UIDFraudRank",
    };

    return fields;
}

//ReduceBy Host, CorrectedQuery
struct TFavoriteAndTopQueriesMergeReducer : public TTaggedReducer {

public:
    TFavoriteAndTopQueriesMergeReducer() = default;
    TFavoriteAndTopQueriesMergeReducer(const THashSet<TString> &webmasterHosts, const TVector<time_t> &tableConfig)
        : WebmasterHosts(webmasterHosts)
        , TableConfig(tableConfig)
    {
    }

    void Save(IOutputStream& stream) const override {
        ::Save(&stream, WebmasterHosts);
        ::Save(&stream, TableConfig);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, WebmasterHosts);
        ::Load(&stream, TableConfig);
        TTaggedReducer::Load(stream);
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        const ui32 MAX_QUERIES_COUNT        = Max<ui32>();
        const ui32 TABLENO_TOP_QUERIES      = 0;
        const ui32 TABLENO_FAVORITE_QUERIES = 1;

        TMaybe<NProto::TFavoriteQueriesSettings> favoriteSettingsFilter = reader.GetSingleRowMaybe(FavoriteQueriesSettingsInputTag);
        reader.SkipRows(FavoriteQueriesSettingsInputTag);
        if (!reader.IsValid()) {
            return;
        }

        TMaybe<NUserSessions::NProto::TQuery> firstRow = reader.GetRowMaybe(UserSessionsInputTag);
        if (!firstRow.Defined()) {
            return;
        }

        const TString host = firstRow.GetRef().GetHost();
        if (!WebmasterHosts.contains(host)) {
            return;
        }

        const TString query = firstRow.GetRef().GetCorrectedQuery();
        const double cm2 = firstRow.GetRef().GetCm2();

        proto::queries2::QueryMessage msgDst;
        msgDst.set_query(query);
        msgDst.set_cm2(cm2);

        TRegionsLimiter limiter;
        NUserSessions::TCompactCounters counters;
        for (const auto &row : reader.GetRows(UserSessionsInputTag)) {
            if (row.GetPosition() >= 50) {
                continue;
            }

            limiter.Add(row.GetRegionId(), row.GetShows());
            if (limiter.NeedCompaction()) {
                limiter.Compact();
                counters.Compact(limiter);
            }
            const ui32 actualRegionId = limiter.GetActualId(row.GetRegionId());
            const time_t timestamp = TableConfig.at(reader.GetTableIndex());

            //Timestamp is patched by TTopUrlsMergeMapper = Table Timestamp
            counters.AddPatchedRegionId(row, timestamp, actualRegionId);
        }

        if (counters.Counters.empty()) {
            return;
        }

        limiter.Compact();
        counters.Compact(limiter);

        TString stream;
        counters.Write(msgDst);
        Y_PROTOBUF_SUPPRESS_NODISCARD msgDst.SerializeToString(&stream);

        if (stream.size() > 127 * 1024 * 1024) {
            NUtils::Compress(stream);
        }

        NProto::TTopUrlReport dstRow;
        dstRow.SetKey(host);
        dstRow.SetSubkey(Sprintf("%08X-%08X", MAX_QUERIES_COUNT - counters.Total.Shows, MAX_QUERIES_COUNT - counters.Total.Clicks));
        dstRow.SetValue(stream);
        writer.AddRowWithOffset(dstRow, TopUrlReportOutputTag, TABLENO_TOP_QUERIES);

        if (favoriteSettingsFilter.Defined()) {
            writer.AddRowWithOffset(dstRow, TopUrlReportOutputTag, TABLENO_FAVORITE_QUERIES);
        }
    }

public:
    THashSet<TString> WebmasterHosts;
    TVector<time_t> TableConfig;

}; //TFavoriteAndTopQueriesMergeReducer

REGISTER_REDUCER(TFavoriteAndTopQueriesMergeReducer)

struct TPrepareFavoriteQueriesSettingsMapper : public NYT::IMapper<NYT::TTableReader<NYT::TYamr>, NYT::TTableWriter<NProto::TFavoriteQueriesSettings>> {
    static TString PruneText(const TString &text, size_t length = 1024) {
        const TUtf16String wideString = UTF8ToWide(text);
        if (wideString.size() < length) {
            return text;
        }
        return WideToUTF8(wideString.substr(0, length));
    }

    void Do(TReader *input, TWriter *output) override {
        NProto::TFavoriteQueriesSettings dstMsg;
        for (; input->IsValid(); input->Next()) {
            const NYT::TYamr &row = input->GetRow();
            proto::querygroups::HostGroupInfo groupInfo;
            Y_PROTOBUF_SUPPRESS_NODISCARD groupInfo.ParseFromString(row.GetValue());

            if (groupInfo.favorite()) {
                dstMsg.SetHost(row.GetKey());
                for (int i = 0; i < groupInfo.queries_size(); i++) {
                    dstMsg.SetCorrectedQuery(PruneText(groupInfo.queries(i).text()));
                    output->AddRow(dstMsg);
                }
            }
        }
    }
}; //TPrepareFavoriteQueriesSettingsMapper

REGISTER_MAPPER(TPrepareFavoriteQueriesSettingsMapper)

//ReduceBy Host, CorrectedQuery
struct TPrepareFavoriteQueriesSettingsReducer : public NYT::IReducer<NYT::TTableReader<NProto::TFavoriteQueriesSettings>, NYT::TTableWriter<NProto::TFavoriteQueriesSettings>> {
    void Do(TReader *input, TWriter *output) override {
        output->AddRow(input->GetRow());
    }
};

REGISTER_REDUCER(TPrepareFavoriteQueriesSettingsReducer)

static NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://www.drive2.ru"))));
    return path;
}

void UpdateFavoriteQueriesSettings(NYT::IClientBasePtr client) {
    LOG_INFO("reports, favorite_queries, update settings");

    TMapCombineReduceCmd<
        TPrepareFavoriteQueriesSettingsMapper,
        TPrepareFavoriteQueriesSettingsReducer,
        TPrepareFavoriteQueriesSettingsReducer>
    (
        client
    )
        .Input(TTable<NYT::TYamr>(client, TConfig::CInstance().TABLE_SOURCE_GROUPS_SETTINGS))
        .Output(TTable<NProto::TFavoriteQueriesSettings>(client, TConfig::CInstance().TABLE_REPORTS_FAVORITE_QUERIES_SETTINGS))
        .ReduceBy({"Host", "CorrectedQuery"})
        .OperationWeight(TConfig::CInstance().OPERATION_WEIGHT)
        .Do()
    ;

    TSortCmd<NProto::TFavoriteQueriesSettings>(client,
        TTable<NProto::TFavoriteQueriesSettings>(client, TConfig::CInstance().TABLE_REPORTS_FAVORITE_QUERIES_SETTINGS)
    )
        .By({"Host", "CorrectedQuery"})
        .OperationWeight(TConfig::CInstance().OPERATION_WEIGHT)
        .Do()
    ;

    LOG_INFO("reports, favorite_queries, update settings");
}

void MonitorSourceTables(const TString &suffix, int mostRecentSourceLag, int holes) {
    MonitorPushQualityLabel(suffix, "Queries_SourceLag", static_cast<double>(mostRecentSourceLag));
    MonitorPushQualityLabel(suffix, "Queries_SourceGaps", static_cast<double>(holes));
}

int TaskFavoriteAndTopQueries(int, const char **) {
    const int MAX_TOP_QUERIES_COUNT = 3000;
    const int PERIOD_DAYS_TOPS      = 84; // 7 days * 12 weeks
    const auto &cfg = TConfig::CInstance();

    NYT::IClientPtr client = NYT::CreateClient(TCommonYTConfigSQ::CInstance().MR_SERVER_HOST_USER_SESSIONS);
    TSourceTablesForTarget targetTopQueries         (client, PERIOD_DAYS_TOPS, cfg.TABLE_REPORTS_TOP_QUERIES);
    TSourceTablesForTarget targetFavoriteQueries    (client, targetTopQueries, cfg.TABLE_REPORTS_FAVORITE_QUERIES);

    if (targetTopQueries.IsUpdated && targetFavoriteQueries.IsUpdated) {
        LOG_INFO("reports, favorite/top/queries, are already updated");
        return 0;
    }

    if (!targetTopQueries.IsComplete) {
        LOG_ERROR("reports, source table set is incomplete");
        return 1;
    }

    THashSet<TString> webmasterHosts = { };
    LoadWebmasterHosts(client, webmasterHosts);

    NYTUtils::CreatePath(client, TConfig::CInstance().TABLE_REPORTS_ROOT);
    MonitorSourceTables(cfg.MONITOR_PERFORMANCE_SUFFIX, targetTopQueries.Lag, targetTopQueries.Holes);
    LOG_INFO("reports, favorite/top/queries, source tables total gap %d days", targetTopQueries.Holes);
    LOG_INFO("reports, favorite/top/queries, the most recent source table lag %d hours", targetTopQueries.Lag);

    NYT::ITransactionPtr tx = client->StartTransaction();
    UpdateFavoriteQueriesSettings(tx);

    TVector<time_t> tableConfig = { -1 }; // the stub for the hosts table
    TVector<TTable<NUserSessions::NProto::TQuery>> inputs;
    for (const auto &source : targetTopQueries.Tables) {
        TTable<NUserSessions::NProto::TQuery> table(tx, DebugPath(source.Name));
        inputs.push_back(table.SelectFields(FIELDS_QUERIES()));
        tableConfig.push_back(source.PeriodBegin);
        LOG_INFO("reports, favorite/top/queries, input %s", source.Name.c_str());
    }
    LOG_INFO("reports, favorite/top/queries, output %s", cfg.TABLE_REPORTS_TOP_QUERIES.c_str());

    TReduceCmd<TFavoriteAndTopQueriesMergeReducer>(tx, new TFavoriteAndTopQueriesMergeReducer(webmasterHosts, tableConfig))
        .Input(TTable<NProto::TFavoriteQueriesSettings>(tx, cfg.TABLE_REPORTS_FAVORITE_QUERIES_SETTINGS), FavoriteQueriesSettingsInputTag)
        .Inputs(inputs, UserSessionsInputTag)
        .Output(TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_TOP_QUERIES), TopUrlReportOutputTag)
        .Output(TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_FAVORITE_QUERIES), TopUrlReportOutputTag)
        .AddLocalFile(TConfig::GEOBASE_FILE_LITE)
        .MaxRowWeight(128_MBs)
        .MemoryLimit(5_GBs)
        .ReduceBy({"Host", "CorrectedQuery"})
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .Do()
    ;

    TCombineReduceCmd<TRecordsLimitCombineReducer, TRecordsLimitCombineReducer>(
        tx,
        new TRecordsLimitCombineReducer(MAX_TOP_QUERIES_COUNT),
        new TRecordsLimitCombineReducer(MAX_TOP_QUERIES_COUNT)
    )
        .Input(TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_TOP_QUERIES))
        .Output(TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_TOP_QUERIES))
        .MaxRowWeight(128_MBs)
        .ReduceBy({"key"})
        .SortBy({"key", "subkey"})
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .Do()
    ;

    DoParallel(
        TSortCmd<NProto::TTopUrlReport>(tx, TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_TOP_QUERIES))
            .MaxRowWeight(128_MBs)
            .By({"key", "subkey"})
            .OperationWeight(cfg.OPERATION_WEIGHT),
        TSortCmd<NProto::TTopUrlReport>(tx, TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_FAVORITE_QUERIES))
            .MaxRowWeight(128_MBs)
            .By({"key", "subkey"})
            .OperationWeight(cfg.OPERATION_WEIGHT)
    );

    targetFavoriteQueries.UpdateTarget(tx);
    targetTopQueries.UpdateTarget(tx);
    LOG_INFO("reports, favorite/top/queries, done");

    tx->Commit();

    return 0;
}

} //namespace NReports
} //namespace NWebmaster
