#include <util/generic/set.h>
#include <util/generic/size_literals.h>

#include <mapreduce/yt/interface/protos/yamr.pb.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/processors/user_sessions/conf/config.h>
#include <wmconsole/version3/processors/user_sessions/library/compact_counters.h>
#include <wmconsole/version3/processors/user_sessions/library/regions_limiter.h>
#include <wmconsole/version3/processors/user_sessions/library/source_tables.h>
#include <wmconsole/version3/processors/user_sessions/protos/reports.pb.h>
#include <wmconsole/version3/processors/user_sessions/protos/user_sessions.pb.h>
#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/protos/querygroups.pb.h>
#include <wmconsole/version3/wmcutil/compress.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>

#include "limits.h"
#include "task_hosts.h"
#include "task_urls.h"

namespace NWebmaster {
namespace NReports {

using namespace NJupiter;

static const TInputTag<NProto::TFavoriteUrlsSettings> FavoriteUrlsSettingsInputTag  (1);
static const TInputTag<NUserSessions::NProto::TQuery> UserSessionsInputTag          (2);

static const TOutputTag<NProto::TTopUrlReport> TopUrlReportOutputTag                (3);
static const TOutputTag<NYT::TYamr> YamrOutputTag                                   (4);

struct TAttrName {
    static constexpr char const Timestamps[] = "timestamps";
};

const NYT::TSortColumns & FIELDS_URLS() {
    const static NYT::TSortColumns fields = {
        "Host",
        "Path",
        "IsMobile",
        "IsPad",
        "RegionId",
        "Position",
        "Clicks",
        "Shows",
        "RequestSource",
        "ResultSource",
        "IsMetrikaRobot",
        "Cm2",
        "Timestamp",
        "UIDFraudRank",
    };

    return fields;
}

struct TTopUrlsMergeMapper : public NYT::IMapper<NYT::TTableReader<NUserSessions::NProto::TQuery>, NYT::TTableWriter<NUserSessions::NProto::TQuery>> {
    Y_SAVELOAD_JOB(WebmasterHosts, TableConfig)

public:
    TTopUrlsMergeMapper() = default;
    TTopUrlsMergeMapper(const THashSet<TString> &webmasterHosts, const TVector<time_t> &tableConfig)
        : WebmasterHosts(webmasterHosts)
        , TableConfig(tableConfig)
    {
    }

    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            NUserSessions::NProto::TQuery row = input->GetRow();
            if (row.GetPosition() >= 50) {
                continue;
            }

            const TString host = input->GetRow().GetHost();
            if (!WebmasterHosts.contains(host)) {
                continue;
            }

            row.SetTimestamp(TableConfig.at(input->GetTableIndex()));
            output->AddRow(row);
        }
    }

public:
    THashSet<TString> WebmasterHosts;
    TVector<time_t> TableConfig;

}; //TTopUrlsMergeMapper

REGISTER_MAPPER(TTopUrlsMergeMapper)

//ReduceBy ...
struct TTopUrlsPrepareReducer : public NYT::IReducer<NYT::TTableReader<NUserSessions::NProto::TQuery>, NYT::TTableWriter<NUserSessions::NProto::TQuery>> {
    void Do(TReader *input, TWriter *output) override {
        NUserSessions::NProto::TQuery firstRow = input->GetRow();
        size_t clicks = 0;
        size_t shows = 0;
        for (; input->IsValid(); input->Next()) {
            clicks += input->GetRow().GetClicks();
            shows += input->GetRow().GetShows();
        }
        firstRow.SetClicks(clicks);
        firstRow.SetShows(shows);
        output->AddRow(firstRow);
    }
}; //TTopUrlsMergeMapper

REGISTER_REDUCER(TTopUrlsPrepareReducer)

//ReduceBy Host, Path
struct TFavoriteAndTopUrlsMergeReducer : public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        const ui32 MAX_RECORDS_COUNT        = Max<ui32>();
        const ui32 TABLENO_TOP_URLS         = 0;
        const ui32 TABLENO_FAVORITE_URLS    = 1;

        TMaybe<NProto::TFavoriteUrlsSettings> favoriteSettingsFilter = reader.GetSingleRowMaybe(FavoriteUrlsSettingsInputTag);
        reader.SkipRows(FavoriteUrlsSettingsInputTag);
        if (!reader.IsValid()) {
            return;
        }

        TMaybe<NUserSessions::NProto::TQuery> firstRow = reader.GetRowMaybe(UserSessionsInputTag);
        if (!firstRow.Defined()) {
            return;
        }

        const TString host = firstRow.GetRef().GetHost();
        const TString path = firstRow.GetRef().GetPath();
        const double cm2 = firstRow.GetRef().GetCm2();

        proto::queries2::QueryMessage msgDst;
        msgDst.set_url(host + path);
        msgDst.set_cm2(cm2);

        TRegionsLimiter limiter;
        NUserSessions::TCompactCounters counters;
        for (const auto &row : reader.GetRows(UserSessionsInputTag)) {
            if (row.GetPosition() >= 50) {
                continue;
            }

            limiter.Add(row.GetRegionId(), row.GetShows());
            if (limiter.NeedCompaction()) {
                limiter.Compact();
                counters.Compact(limiter);
            }
            const ui32 actualRegionId = limiter.GetActualId(row.GetRegionId());
            //Timestamp is patched by TTopUrlsMergeMapper = Table Timestamp
            counters.AddPatchedRegionId(row, row.GetTimestamp(), actualRegionId);
        }

        if (counters.Counters.empty()) {
            return;
        }

        limiter.Compact();
        counters.Compact(limiter);

        TString stream;
        counters.Write(msgDst);
        Y_PROTOBUF_SUPPRESS_NODISCARD msgDst.SerializeToString(&stream);

        if (stream.size() > 127 * 1024 * 1024) {
            NUtils::Compress(stream);
        }

        NProto::TTopUrlReport dstRow;

        const ui32 showsCount = counters.Total.Shows;
        const ui32 clicksCount = counters.Total.Clicks;
        dstRow.SetKey(host);
        dstRow.SetSubkey(Sprintf("%08X-%08X", MAX_RECORDS_COUNT - clicksCount, MAX_RECORDS_COUNT - showsCount));
        dstRow.SetValue(stream);
        dstRow.SetPath(path);
        dstRow.SetShows(showsCount);
        dstRow.SetClicks(clicksCount);
        writer.AddRowWithOffset(dstRow, TopUrlReportOutputTag, TABLENO_TOP_URLS);

        if (favoriteSettingsFilter.Defined()) {
            writer.AddRowWithOffset(dstRow, TopUrlReportOutputTag, TABLENO_FAVORITE_URLS);
        }
    }

}; //TFavoriteAndTopUrlsMergeReducer

REGISTER_REDUCER(TFavoriteAndTopUrlsMergeReducer)

struct TPrepareFavoriteUrlsSettingsMapper : public NYT::IMapper<NYT::TTableReader<NYT::TYamr>, NYT::TTableWriter<NProto::TFavoriteUrlsSettings>> {
    void Do(TReader *input, TWriter *output) override {
        NProto::TFavoriteUrlsSettings dstMsg;
        for (; input->IsValid(); input->Next()) {
            const NYT::TYamr &row = input->GetRow();
            proto::querygroups::HostGroupInfo groupInfo;
            Y_PROTOBUF_SUPPRESS_NODISCARD groupInfo.ParseFromString(row.GetValue());

            if (groupInfo.favorite()) {
                dstMsg.SetHost(row.GetKey());
                for (int i = 0; i < groupInfo.urls_size(); i++) {
                    dstMsg.SetPath(groupInfo.urls(i).path());
                    output->AddRow(dstMsg);
                }
            }
        }
    }
}; //TPrepareFavoriteUrlsSettingsMapper

REGISTER_MAPPER(TPrepareFavoriteUrlsSettingsMapper)

//ReduceBy Host, Path
struct TPrepareFavoriteUrlsSettingsReducer : public NYT::IReducer<NYT::TTableReader<NProto::TFavoriteUrlsSettings>, NYT::TTableWriter<NProto::TFavoriteUrlsSettings>> {
    void Do(TReader *input, TWriter *output) override {
        output->AddRow(input->GetRow());
    }
};

REGISTER_REDUCER(TPrepareFavoriteUrlsSettingsReducer)

static NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://www.drive2.ru"))));
    return path;
}

void UpdateFavoriteUrlsSettings(NYT::IClientBasePtr client) {
    LOG_INFO("reports, favorite_urls, update settings");

    TMapCombineReduceCmd<
        TPrepareFavoriteUrlsSettingsMapper,
        TPrepareFavoriteUrlsSettingsReducer,
        TPrepareFavoriteUrlsSettingsReducer>
    (
        client
    )
        .Input(TTable<NYT::TYamr>(client, TConfig::CInstance().TABLE_SOURCE_GROUPS_SETTINGS))
        .Output(TTable<NProto::TFavoriteUrlsSettings>(client, TConfig::CInstance().TABLE_REPORTS_FAVORITE_URLS_SETTINGS))
        .ReduceBy({"Host", "Path"})
        .OperationWeight(TConfig::CInstance().OPERATION_WEIGHT)
        .Do()
    ;

    TSortCmd<NProto::TFavoriteUrlsSettings>(client,
        TTable<NProto::TFavoriteUrlsSettings>(client, TConfig::CInstance().TABLE_REPORTS_FAVORITE_URLS_SETTINGS)
    )
        .By({"Host", "Path"})
        .OperationWeight(TConfig::CInstance().OPERATION_WEIGHT)
        .Do()
    ;

    LOG_INFO("reports, favorite_urls, update settings");
}

int TaskFavoriteAndTopUrls(int, const char **) {
    const auto &cfg = TConfig::CInstance();
    const int PERIOD_DAYS_TOPS = 84; // 7 days * 12 weeks
    const int MAX_TOP_URLS_COUNT = 3000;

    NYT::IClientPtr client = NYT::CreateClient(TCommonYTConfigSQ::CInstance().MR_SERVER_HOST_USER_SESSIONS);
    TSourceTablesForTarget targetTopUrls            (client, PERIOD_DAYS_TOPS, cfg.TABLE_REPORTS_TOP_URLS);
    TSourceTablesForTarget targetFavoriteUrls       (client, targetTopUrls, cfg.TABLE_REPORTS_FAVORITE_URLS);

    if (targetTopUrls.IsUpdated && targetFavoriteUrls.IsUpdated) {
        LOG_INFO("reports, favorite/top/urls, are already updated");
        return 0;
    }

    if (!targetTopUrls.IsComplete) {
        LOG_ERROR("reports, source table set is incomplete");
        return 1;
    }

    THashSet<TString> webmasterHosts = { };
    LoadWebmasterHosts(client, webmasterHosts);

    NYTUtils::CreatePath(client, TConfig::CInstance().TABLE_REPORTS_ROOT);
    NYT::ITransactionPtr tx = client->StartTransaction();
    UpdateFavoriteUrlsSettings(tx);

    TVector<time_t> tableConfig; //= { -1 }; // the stub for the hosts table
    TVector<TTable<NUserSessions::NProto::TQuery>> inputs;
    for (const auto &source : targetTopUrls.Tables) {
        TTable<NUserSessions::NProto::TQuery> table(tx, DebugPath(source.Name));
        inputs.push_back(table.SelectFields(FIELDS_URLS()));
        tableConfig.push_back(source.PeriodBegin);
        LOG_INFO("reports, favorite/top/urls, input %s", source.Name.c_str());
    }
    LOG_INFO("reports, favorite/top/urls, output %s", cfg.TABLE_REPORTS_TOP_URLS.c_str());

    const TString TABLE_REPORTS_TOP_URLS_INTM = cfg.TABLE_REPORTS_TOP_URLS + "_intm";

    TMapCombineReduceCmd<TTopUrlsMergeMapper, TTopUrlsPrepareReducer, TTopUrlsPrepareReducer>(
        tx,
        new TTopUrlsMergeMapper(webmasterHosts, tableConfig),
        new TTopUrlsPrepareReducer,
        new TTopUrlsPrepareReducer
    )
        .Inputs(inputs)
        .Output(TTable<NUserSessions::NProto::TQuery>(tx, TABLE_REPORTS_TOP_URLS_INTM))
        .MaxRowWeight(128_MBs)
        .MapperMemoryLimit(3_GBs)
        .ReduceBy({"Host", "Path", "IsMobile", "IsPad", "RegionId", "Position", "Timestamp"})
        .OperationWeight(TConfig::CInstance().OPERATION_WEIGHT)
        .Do()
    ;

    TSortCmd<NUserSessions::NProto::TQuery>(tx,
        TTable<NUserSessions::NProto::TQuery>(tx, TABLE_REPORTS_TOP_URLS_INTM)
    )
        .By({"Host", "Path"})
        .OperationWeight(TConfig::CInstance().OPERATION_WEIGHT)
        .MaxRowWeight(128_MBs)
        .Do()
    ;

    TReduceCmd<TFavoriteAndTopUrlsMergeReducer>(tx)
        .Input(TTable<NProto::TFavoriteUrlsSettings>(tx, cfg.TABLE_REPORTS_FAVORITE_URLS_SETTINGS), FavoriteUrlsSettingsInputTag)
        .Input(TTable<NUserSessions::NProto::TQuery>(tx, TABLE_REPORTS_TOP_URLS_INTM), UserSessionsInputTag)
        .Output(TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_TOP_URLS), TopUrlReportOutputTag)
        .Output(TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_FAVORITE_URLS), TopUrlReportOutputTag)
        .MaxRowWeight(128_MBs)
        .AddLocalFile(TConfig::GEOBASE_FILE_LITE)
        .MemoryLimit(7_GBs)
        .ReduceBy({"Host", "Path"})
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .Do()
    ;

    tx->Remove(TABLE_REPORTS_TOP_URLS_INTM);

    TCombineReduceCmd<TRecordsLimitCombineReducer, TRecordsLimitCombineReducer>(
        tx,
        new TRecordsLimitCombineReducer(MAX_TOP_URLS_COUNT),
        new TRecordsLimitCombineReducer(MAX_TOP_URLS_COUNT)
    )
        .Input(TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_TOP_URLS))
        .Output(TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_TOP_URLS))
        .MaxRowWeight(128_MBs)
        .ReduceBy({"key"})
        .SortBy({"key", "subkey"})
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .Do()
    ;

    DoParallel(
        TSortCmd<NProto::TTopUrlReport>(tx, TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_TOP_URLS))
            .By({"key", "subkey"})
            .MaxRowWeight(128_MBs)
            .OperationWeight(cfg.OPERATION_WEIGHT),
        TSortCmd<NProto::TTopUrlReport>(tx, TTable<NProto::TTopUrlReport>(tx, cfg.TABLE_REPORTS_FAVORITE_URLS))
            .By({"key", "subkey"})
            .MaxRowWeight(128_MBs)
            .OperationWeight(cfg.OPERATION_WEIGHT)
    );

    targetFavoriteUrls.UpdateTarget(tx);
    targetTopUrls.UpdateTarget(tx);
    LOG_INFO("reports, favorite/top/urls, done");

    tx->Commit();

    return 0;
}

} //namespace NReports
} //namespace NWebmaster
