#include <util/draft/date.h>
#include <util/digest/fnv.h>
#include <util/generic/size_literals.h>
#include <util/string/join.h>
#include <util/string/reverse.h>
#include <library/cpp/string_utils/url/url.h>

#include <library/cpp/containers/comptrie/comptrie.h>
#include <library/cpp/containers/comptrie/prefix_iterator.h>

#include <mapreduce/yt/interface/protos/yamr.pb.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/processors/user_sessions/conf/config.h>
#include <wmconsole/version3/processors/user_sessions/library/source_tables.h>
#include <wmconsole/version3/processors/user_sessions/protos/trends2.pb.h>
#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/regex.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>

#include "task_trends2.h"

using namespace NJupiter;

#define OPERATION_WEIGHT 1.0f

namespace NWebmaster {
namespace NTrends2 {

struct TRadarDomainInfo {
    Y_SAVELOAD_DEFINE(Domain, Type, Thematics)

    bool Defined() const {
        return !Domain.empty()
            && !Type.empty()
            && !Thematics.empty();
    }

public:
    TString Domain;
    TString Type;
    THashSet<TString> Thematics;
};

using TThematicsInfo = THashMap<TString, THashMap<TString, size_t>>; //(Thematics, Type) -> domains count

//ReduceBy key
//SortBy key, subkey
struct TQueriesMapper : public NYT::IMapper<NYT::TTableReader<NYT::TYamr>, NYT::TTableWriter<NTrends2::NProto::THostCounters>> {
    Y_SAVELOAD_JOB(PeriodsConfig)

    TQueriesMapper() = default;
    TQueriesMapper(TDeque<time_t> &periodsConfig)
        : PeriodsConfig(periodsConfig)
    {
    }

public:
    void Do(TReader *input, TWriter *output) override {
        struct TCounters {
        public:
            size_t Clicks = 0;
            size_t Count = 0;
            size_t Shows = 0;
            float CTR = 0.0f;
        };

        using TIsPadCounters        = THashMap<bool, TCounters>;
        using TIsMobileCounters     = THashMap<bool, TIsPadCounters>;
        using TPeriodCounters       = THashMap<time_t, TIsMobileCounters>;
        using THostCounters         = THashMap<TString, TPeriodCounters>;

        THostCounters counters;
        for (; input->IsValid(); input->Next()) {
            const TString host = input->GetRow().GetKey();
            const time_t period = PeriodsConfig[input->GetTableIndex()];

            proto::queries2::QueryMessage msg;
            Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromString(input->GetRow().GetValue());
            for (int i = 0; i < msg.reports_by_region_size(); i++) {
                const auto &region = msg.reports_by_region(i);
                size_t clicks = 0;
                size_t shows = 0;
                float ctr = 0.0f;
                size_t count = 0;
                for (int p = 0; p < region.position_info_size(); p++) {
                    const auto &position = msg.reports_by_region(i).position_info(p);
                    clicks += position.clicks_count();
                    shows += position.shows_count();
                    ctr += position.shows_count() > 0
                        ? static_cast<float>(position.clicks_count()) / static_cast<float>(position.shows_count())
                        : 0.0f;
                    count++;
                }

                auto &countersRef = counters
                    [host]
                    [period]
                    [region.is_mobile()]
                    [region.is_pad()]
                ;
                countersRef.Clicks += clicks;
                countersRef.Shows += shows;
                countersRef.CTR += ctr;
                countersRef.Count += count;
            }
        }

        NTrends2::NProto::THostCounters dstMsg;
        for (const auto &hostObj : counters) {
            dstMsg.SetHost(hostObj.first);
            for (const auto &timestampObj : hostObj.second) {
                dstMsg.SetTimestamp(timestampObj.first);
                for (const auto &isMobileObj : timestampObj.second) {
                    dstMsg.SetIsMobile(isMobileObj.first);
                    for (const auto &isPadObj : isMobileObj.second) {
                        dstMsg.SetIsPad(isPadObj.first);
                        dstMsg.SetClicks(isPadObj.second.Clicks);
                        dstMsg.SetShows(isPadObj.second.Shows);
                        dstMsg.SetCTR(isPadObj.second.CTR / static_cast<float>(isPadObj.second.Count));
                        output->AddRow(dstMsg);
                    }
                }
            }
        }
    }

public:
    TDeque<time_t> PeriodsConfig;
};

REGISTER_MAPPER(TQueriesMapper)

//ReduceBy Host, Timestamp, IsPad, IsMobile
struct TQueriesReducer : public NYT::IReducer<NYT::TTableReader<NTrends2::NProto::THostCounters>, NYT::TTableWriter<NTrends2::NProto::THostCounters>> {
public:
    void Do(TReader *input, TWriter *output) override {
        NTrends2::NProto::THostCounters dstMsg = input->GetRow();
        size_t clicks = 0;
        size_t count = 0;
        float ctr = 0.0f;
        size_t shows = 0;
        for (; input->IsValid(); input->Next()) {
            const NTrends2::NProto::THostCounters &row = input->GetRow();
            clicks += row.GetClicks();
            shows += row.GetShows();
            ctr += row.GetCTR();
            count++;
        }
        dstMsg.SetClicks(clicks);
        dstMsg.SetShows(shows);
        dstMsg.SetCTR(count > 0 ? ctr / static_cast<float>(count) : 0.0f);
        output->AddRow(dstMsg);
    }
};

REGISTER_REDUCER(TQueriesReducer)

//ReduceBy Host, Timestamp, IsPad, IsMobile
struct TQueriesMergeReducer : public NYT::IReducer<NYT::TTableReader<NTrends2::NProto::THostCounters>, NYT::TTableWriter<NTrends2::NProto::THostCounters>> {
public:
    void Do(TReader *input, TWriter *output) override {
        output->AddRow(input->GetRow());
    }
};

REGISTER_REDUCER(TQueriesMergeReducer)

//ReduceBy key
//SortBy key, subkey
struct TTrendsMapper : public NYT::IMapper<NYT::TTableReader<NTrends2::NProto::THostCounters>, NYT::TTableWriter<NTrends2::NProto::TRadarCategory>> {
    Y_SAVELOAD_JOB(DomainsInfo, DomainsTrieStream)

    TTrendsMapper() = default;
    TTrendsMapper(const TDeque<TRadarDomainInfo> &domainsInfo, const TVector<char> &domainsTrieStream)
        : DomainsInfo(domainsInfo)
        , DomainsTrieStream(domainsTrieStream)
    {
    }

public:
    void Start(TWriter* /*writer*/) override {
        Trie.Init(&DomainsTrieStream[0], DomainsTrieStream.size());
    }

    void Do(TReader *input, TWriter *output) override {
        struct TCounter {
        public:
            size_t Clicks = 0;
            size_t Count = 0;
            size_t Shows = 0;
            float CTR = 0.0f;
        };

        using TThematicsCounters    = THashMap<TString, TCounter>;
        using TTypeCounters         = THashMap<TString, TThematicsCounters>;
        using TIsMobileCounters     = THashMap<bool, TTypeCounters>;
        using TIsPadCounters        = THashMap<bool, TIsMobileCounters>;
        using TTimestampCounters    = THashMap<time_t, TIsPadCounters>;

        TTimestampCounters counters;
        for (; input->IsValid(); input->Next()) {
            const auto &row = input->GetRow();
            const TString host = row.GetHost();
            TString rhost = host;
            ReverseInPlace(rhost);

            for (TPrefixIterator<TCompactTrie<char>> it = MakePrefixIterator(Trie, rhost.data(), rhost.size()); it; ++it) {
                size_t hostNo = 0;
                it.GetValue(hostNo);

                const TRadarDomainInfo &rdi = DomainsInfo[hostNo];
                if (NUtils::IsSubdomain(host, rdi.Domain)) {
                    for (const TString &thematics : rdi.Thematics) {
                        auto &countersRef = counters
                            [row.GetTimestamp()]
                            [row.GetIsPad()]
                            [row.GetIsMobile()]
                            [rdi.Type]
                            [thematics]
                        ;
                        countersRef.Clicks += row.GetClicks();
                        countersRef.Shows += row.GetShows();
                        countersRef.CTR += row.GetCTR();
                        countersRef.Count++;
                    }
                }
            }
        }

        NTrends2::NProto::TRadarCategory dstMsg;
        for (const auto &tsObj : counters) {
            const time_t timestamp = tsObj.first;
            dstMsg.SetTimestamp(timestamp);
            for (const auto &isPadObj : tsObj.second) {
                dstMsg.SetIsPad(isPadObj.first);
                for (const auto &isMobileObj : isPadObj.second) {
                    dstMsg.SetIsMobile(isMobileObj.first);
                    for (const auto &isTypeObj : isMobileObj.second) {
                        dstMsg.SetType(isTypeObj.first);
                        for (const auto &isThematicsObj : isTypeObj.second) {
                            dstMsg.SetThematics(isThematicsObj.first);
                            dstMsg.SetClicks(isThematicsObj.second.Clicks);
                            dstMsg.SetShows(isThematicsObj.second.Shows);
                            dstMsg.SetCTR(isThematicsObj.second.CTR / static_cast<float>(isThematicsObj.second.Count));
                            output->AddRow(dstMsg);
                        }
                    }
                }
            }
        }
    }

public:
    TDeque<TRadarDomainInfo> DomainsInfo;
    TVector<char> DomainsTrieStream;
    TCompactTrie<char> Trie;
};

REGISTER_MAPPER(TTrendsMapper)

//ReduceBy Timestamp, Thematics, Type, IsPad, IsMobile
struct TTrendsReducer : public NYT::IReducer<NYT::TTableReader<NTrends2::NProto::TRadarCategory>, NYT::TTableWriter<NTrends2::NProto::TRadarCategory>> {
    Y_SAVELOAD_JOB(ThematicsInfo)

    TTrendsReducer() = default;
    TTrendsReducer(const TThematicsInfo &thematicsInfo)
        : ThematicsInfo(thematicsInfo)
    {
    }

public:
    void Do(TReader *input, TWriter *output) override {
        NTrends2::NProto::TRadarCategory dstMsg = input->GetRow();
        size_t clicks = 0;
        size_t count = 0;
        float ctr = 0.0f;
        size_t shows = 0;
        for (; input->IsValid(); input->Next()) {
            const NTrends2::NProto::TRadarCategory &row = input->GetRow();
            clicks += row.GetClicks();
            shows += row.GetShows();
            ctr += row.GetCTR();
            count++;
        }
        dstMsg.SetClicks(clicks);
        dstMsg.SetShows(shows);
        dstMsg.SetDomains(ThematicsInfo.at(dstMsg.GetThematics()).at(dstMsg.GetType()));
        dstMsg.SetCTR(count > 0 ? ctr / static_cast<float>(count) : 0.0f);
        output->AddRow(dstMsg);
    }

public:
    TThematicsInfo ThematicsInfo;
};

REGISTER_REDUCER(TTrendsReducer)

TString GetLatestRadarTable(NYT::IClientBasePtr client) {
    TString latestSource;
    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(client, TConfig::CInstance().TABLE_SOURCE_RADAR_ROOT, tables);

    TRegularExpression regex("^(\\d{4}-\\d{2}+-\\d{2}+)_\\d+$");
    for (const NYTUtils::TTableInfo &table : tables) {
        const TString path = NYTUtils::GetDirectoryName(table.Name);
        const TString name = NYTUtils::GetTableName(table.Name);
        if (path != TConfig::CInstance().TABLE_SOURCE_RADAR_ROOT) {
            continue;
        }

        TVector<TString> period;
        if (regex.GetMatches(name, period) != 1) {
            continue;
        }

        if (latestSource.empty()) {
            latestSource = table.Name;
        }

        if (table.Name > latestSource) {
            latestSource = table.Name;
        }
    }

    if (latestSource.empty()) {
        ythrow yexception() << "Radar table not found";
    }

    return latestSource;
}

void LoadRadarDomainsInfo(NYT::IClientBasePtr client, TDeque<TRadarDomainInfo> &domains, TThematicsInfo &thematicsInfo) {
    const TString latestRadarTable = GetLatestRadarTable(client);
    auto reader = client->CreateTableReader<NYT::TNode>(latestRadarTable);
    for (; reader->IsValid(); reader->Next()) {
        const NYT::TNode &row = reader->GetRow();
        const NYT::TNode &thematicsNode = row["thematics"];
        const TString &domain = row["domain"].AsString();

        TString idnDomain;
        if (!NUtils::IDNHostToAscii(domain, idnDomain)) {
            continue;
        }
        idnDomain.to_lower();

        TRadarDomainInfo rdi;
        rdi.Type = row["type"].AsString();
        rdi.Domain = idnDomain;
        for (const NYT::TNode &l1 : thematicsNode.AsList()) {
            TDeque<TString> thPath;
            for (const NYT::TNode &l2 : l1.AsList()) {
                const TString &thematics = l2.AsString();
                thPath.push_back(thematics);
                const TString thPathStr = JoinSeq("/", thPath);
                rdi.Thematics.insert(thPathStr);
                thematicsInfo[thPathStr][rdi.Type]++;
            }
        }

        if (rdi.Defined()) {
            domains.push_back(rdi);
        }
    }
}

void LoadSnapshotLog(NYT::IClientBasePtr client, TSet<TString> &snapshotLog) {
    const TString F_PROCESED = "Processed";
    if (!client->Exists(TConfig::CInstance().TABLE_TRENDS2_SNAPSHOT_LOG)) {
        return;
    }

    auto reader = client->CreateTableReader<NYT::TNode>(TConfig::CInstance().TABLE_TRENDS2_SNAPSHOT_LOG);
    for (; reader->IsValid(); reader->Next()) {
        snapshotLog.insert(reader->GetRow()[F_PROCESED].AsString());
    }
}

void UpdateSnapshotLog(NYT::IClientBasePtr client, const TSet<TString> &snapshotLog) {
    const TString F_PROCESED = "Processed";
    auto writer = client->CreateTableWriter<NYT::TNode>(NYT::TRichYPath(TConfig::CInstance().TABLE_TRENDS2_SNAPSHOT_LOG).SortedBy(F_PROCESED));
    for (const TString &record : snapshotLog) {
        writer->AddRow(NYT::TNode()
            (F_PROCESED, record)
        );
    }
    writer->Finish();
}

int TaskTrendsUpdateSnapshot(int, const char **) {
    const auto &cfg = TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(cfg.MR_SERVER_HOST_QUERIES);

    NYTUtils::CreatePath(client, cfg.TABLE_TRENDS2_ROOT);

    const ui32 USE_DAYS = 120;
    TDeque<TSourceTable> srcTables;
    LoadConvertedTables(client, cfg.TABLE_SOURCE_QUERIES_CONVERTED_PREFIX, srcTables, USE_DAYS);
    if (srcTables.size() > USE_DAYS) {
        srcTables.erase(srcTables.begin(), srcTables.begin() + (srcTables.size() - USE_DAYS));
    }

    TSet<TSourceTable> srcTablesSet(srcTables.begin(), srcTables.end());
    if (srcTablesSet.empty()) {
        LOG_INFO("trends2, there are no source tables");
        return 0;
    }

    TYtSourceTrigger trigger(client, cfg.TABLE_TRENDS2_SNAPSHOT);
    if (!trigger.NeedUpdate(srcTablesSet.rbegin()->Name)) {
        LOG_INFO("trends2, snapshot is already updated");
        return 0;
    }

    //const NYT::TSortColumns KEYS_RADAR_CATEGORY = {"Thematics", "Type", "IsPad", "IsMobile"};
    const NYT::TSortColumns KEYS_HOST_COUNTERS = {"Host", "Timestamp", "IsPad", "IsMobile"};
    LOG_INFO("trends2, updating snapshot");

    NYT::ITransactionPtr tx = client->StartTransaction();
    TSet<TString> snapshotLog;
    LoadSnapshotLog(tx, snapshotLog);

    TDeque<time_t> periodsConfig;
    TDeque<TTable<NYT::TYamr>> inputTables;
    for (auto &table : srcTablesSet) {
        const TString tableName = NYTUtils::GetTableName(table.Name);
        if (!snapshotLog.contains(tableName)) {
            inputTables.emplace_back(tx, table.Name);
            periodsConfig.emplace_back(table.PeriodBegin);
            snapshotLog.insert(tableName);
        }
    }

    if (inputTables.empty()) {
        LOG_INFO("trends2, no unprocessed tables");
        return 0;
    }

    TMapCombineReduceCmd<TQueriesMapper, TQueriesReducer, TQueriesReducer>(
        tx,
        new TQueriesMapper(periodsConfig),
        nullptr,
        new TQueriesReducer
    )
        .OperationWeight(OPERATION_WEIGHT)
        .Inputs(inputTables)
        .Output(TTable<NTrends2::NProto::THostCounters>(tx, cfg.TABLE_TRENDS2_INTM))
        .MapperMemoryLimit(2_GBs)
        .ReduceBy(KEYS_HOST_COUNTERS)
        .Do()
    ;

    TSortCmd<NTrends2::NProto::THostCounters>(tx, TTable<NTrends2::NProto::THostCounters>(tx, cfg.TABLE_TRENDS2_INTM))
        .OperationWeight(OPERATION_WEIGHT)
        .By(KEYS_HOST_COUNTERS)
        .Do()
    ;

    if (!tx->Exists(cfg.TABLE_TRENDS2_SNAPSHOT)) {
        TSortCmd<NTrends2::NProto::THostCounters>(tx, TTable<NTrends2::NProto::THostCounters>(tx, cfg.TABLE_TRENDS2_SNAPSHOT).PreCreate())
            .OperationWeight(OPERATION_WEIGHT)
            .By(KEYS_HOST_COUNTERS)
            .Do()
        ;
    }

    TReduceCmd<TQueriesMergeReducer>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NTrends2::NProto::THostCounters>(tx, cfg.TABLE_TRENDS2_INTM))
        .Input(TTable<NTrends2::NProto::THostCounters>(tx, cfg.TABLE_TRENDS2_SNAPSHOT))
        .Output(TTable<NTrends2::NProto::THostCounters>(tx, cfg.TABLE_TRENDS2_SNAPSHOT).AsSortedOutput(KEYS_HOST_COUNTERS))
        .ReduceBy(KEYS_HOST_COUNTERS)
        .Do()
    ;

    tx->Remove(cfg.TABLE_TRENDS2_INTM);
    UpdateSnapshotLog(tx, snapshotLog);
    trigger.Update(tx, srcTablesSet.rbegin()->Name);
    tx->Commit();
    LOG_INFO("trends2, updating snapshot - done");
    return 0;
}

void BuildRadarDomainsTrie(const TDeque<TRadarDomainInfo> &domains, TVector<char> &hostsTrieStream) {
    TCompactTrie<char>::TBuilder trieBuilder;
    TDeque<TString> hostsArray/*(domains.size())*/;

    for (const auto &rdi : domains) {
        TString rhost = rdi.Domain;
        ReverseInPlace(rhost);
        //Cout << hostsArray.size() << Endl;
        trieBuilder.Add(rhost, hostsArray.size());
        hostsArray.push_back(rdi.Domain);
    }

    //Cout << "DOMAINS.size() = " << domains.size() << Endl;

    TBufferStream data;
    trieBuilder.SaveAndDestroy(data);
    hostsTrieStream.assign(data.Buffer().Data(), data.Buffer().Data() + data.Buffer().Size());
}

int TaskTrendsUpdateTrends(int, const char **) {
    const auto &cfg = TConfig::CInstance();
    const NYT::TSortColumns KEYS_RADAR_CATEGORY = {"Thematics", "Type", "Timestamp", "IsPad", "IsMobile"};

    NYT::IClientPtr client = NYT::CreateClient(cfg.MR_SERVER_HOST_QUERIES);
    TYtSourceTrigger triggerSnapshot(client, cfg.TABLE_TRENDS2_SNAPSHOT);
    TYtSourceTrigger triggerTrends(client, cfg.TABLE_TRENDS2_TRENDS);
    if (!triggerTrends.NeedUpdate(triggerSnapshot.Source)) {
        LOG_INFO("trends2, trends are already updated");
        return 0;
    }

    LOG_INFO("trends2, updating trends");
    NYT::ITransactionPtr tx = client->StartTransaction();

    TDeque<TRadarDomainInfo> domains;
    TThematicsInfo thematicsInfo;
    LoadRadarDomainsInfo(tx, domains, thematicsInfo);

    TVector<char> domainsTrieStream;
    BuildRadarDomainsTrie(domains, domainsTrieStream);

    TMapCombineReduceCmd<TTrendsMapper, TTrendsReducer, TTrendsReducer>(
        tx,
        new TTrendsMapper(domains, domainsTrieStream),
        nullptr,
        new TTrendsReducer(thematicsInfo)
    )
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NTrends2::NProto::THostCounters>(tx, cfg.TABLE_TRENDS2_SNAPSHOT))
        .Output(TTable<NTrends2::NProto::TRadarCategory>(tx, cfg.TABLE_TRENDS2_TRENDS))
        .ReduceBy(KEYS_RADAR_CATEGORY)
        .Do()
    ;

    TSortCmd<NTrends2::NProto::TRadarCategory>(tx, TTable<NTrends2::NProto::TRadarCategory>(tx, cfg.TABLE_TRENDS2_TRENDS))
        .OperationWeight(OPERATION_WEIGHT)
        .By(KEYS_RADAR_CATEGORY)
        .Do()
    ;

    triggerTrends.Update(tx, triggerSnapshot.Source);
    tx->Commit();
    LOG_INFO("trends2, updating trends - done");
    return 0;
}

} //namespace NTrends2
} //namespace NWebmaster
